summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAshish Pandey <aspandey@redhat.com>2016-06-09 16:19:37 +0530
committerXavier Hernandez <xhernandez@datalab.es>2016-06-10 02:14:37 -0700
commita837357c5c7873bf19155e76bf6c251fa799a605 (patch)
treedfcb2df56d748c971d9abd1e98bf67750bfcb5ea
parent5016cc548d4368b1c180459d6fa8ae012bb21d6e (diff)
cluster/ec: Pass xdata to dht in case of error
Problem: In case of mkdir failure, dht expects error information so that it can act accordingly. Aftre adding bricks and re balance, layout gets changed. Fop "mkdir" with old layout returns EIO. EC gets this error in xdata but does not pass it back to dht. In this case dht will not be able to take corrective action. Solution: Return xdata back to dht Change-Id: I24def8038e6880607689b7b046dc6428f564c6ab BUG: 1344277 Signed-off-by: Ashish Pandey <aspandey@redhat.com> Reviewed-on: http://review.gluster.org/14679 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Reviewed-by: Xavier Hernandez <xhernandez@datalab.es> Tested-by: Atin Mukherjee <amukherj@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c10
1 files changed, 6 insertions, 4 deletions
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 68741137619..e181170650d 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -46,12 +46,12 @@ ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,
if (!cbk)
goto out;
- if (op_ret < 0)
- goto out;
-
if (xdata)
cbk->xdata = dict_ref (xdata);
+ if (op_ret < 0)
+ goto out;
+
if (poststat)
cbk->iatt[i++] = *poststat;
@@ -584,12 +584,14 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
+ cbk = fop->answer;
GF_ASSERT(fop->error != 0);
if (fop->cbks.mkdir != NULL)
{
fop->cbks.mkdir(fop->req_frame, fop, fop->xl, -1, fop->error,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL,
+ ((cbk) ? cbk->xdata : NULL));
}
return EC_STATE_LOCK_REUSE;
/> -rw-r--r--xlators/cluster/afr/src/afr-inode-read.h39
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c2001
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h68
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1266
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h35
-rw-r--r--xlators/cluster/afr/src/afr-open.c233
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c167
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h22
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c1298
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h80
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1127
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c815
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c458
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h33
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1763
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h66
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c1150
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h48
-rw-r--r--xlators/cluster/afr/src/afr.c323
-rw-r--r--xlators/cluster/afr/src/afr.h513
-rw-r--r--xlators/cluster/afr/src/pump.c372
-rw-r--r--xlators/cluster/afr/src/pump.h29
-rw-r--r--xlators/cluster/dht/src/Makefile.am13
-rw-r--r--xlators/cluster/dht/src/dht-common.c1964
-rw-r--r--xlators/cluster/dht/src/dht-common.h351
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c225
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c96
-rw-r--r--xlators/cluster/dht/src/dht-helper.c496
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c279
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c535
-rw-r--r--xlators/cluster/dht/src/dht-layout.c216
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c182
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h23
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1208
-rw-r--r--xlators/cluster/dht/src/dht-rename.c188
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c565
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c497
-rw-r--r--xlators/cluster/dht/src/nufa.c395
-rw-r--r--xlators/cluster/dht/src/switch.c310
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am11
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c675
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h28
-rw-r--r--xlators/cluster/stripe/src/stripe.c3099
-rw-r--r--xlators/cluster/stripe/src/stripe.h163
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am7
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c995
-rw-r--r--xlators/debug/error-gen/src/error-gen.h36
-rw-r--r--xlators/debug/io-stats/src/Makefile.am7
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c833
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c3078
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c69
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am (renamed from xlators/protocol/legacy/lib/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am (renamed from xlators/protocol/legacy/server/Makefile.am)2
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/Makefile.am3
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1489
-rw-r--r--xlators/features/index/src/index.h73
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c332
-rw-r--r--xlators/features/locks/src/common.h97
-rw-r--r--xlators/features/locks/src/entrylk.c232
-rw-r--r--xlators/features/locks/src/inodelk.c391
-rw-r--r--xlators/features/locks/src/locks-mem-types.h21
-rw-r--r--xlators/features/locks/src/locks.h75
-rw-r--r--xlators/features/locks/src/posix.c1323
-rw-r--r--xlators/features/locks/src/reservelk.c51
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c52
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c37
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h24
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c52
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h27
-rw-r--r--xlators/features/marker/src/marker-quota.c267
-rw-r--r--xlators/features/marker/src/marker-quota.h50
-rw-r--r--xlators/features/marker/src/marker.c783
-rw-r--r--xlators/features/marker/src/marker.h49
-rw-r--r--xlators/features/marker/utils/Makefile.am3
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/gsyncd.c346
-rw-r--r--xlators/features/marker/utils/src/procdiggy.c124
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h26
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/README.md81
-rw-r--r--xlators/features/marker/utils/syncdaemon/__codecheck.py46
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py224
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py20
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py369
-rw-r--r--xlators/features/marker/utils/syncdaemon/libcxattr.py72
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py518
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py123
-rw-r--r--xlators/features/marker/utils/syncdaemon/repce.py225
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py837
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py269
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c29
-rw-r--r--xlators/features/protect/Makefile.am (renamed from xlators/protocol/legacy/client/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c661
-rw-r--r--xlators/features/quiesce/src/quiesce.h23
-rw-r--r--xlators/features/quota/src/Makefile.am7
-rw-r--r--xlators/features/quota/src/quota-mem-types.h23
-rw-r--r--xlators/features/quota/src/quota.c946
-rw-r--r--xlators/features/quota/src/quota.h52
-rw-r--r--xlators/features/read-only/src/Makefile.am9
-rw-r--r--xlators/features/read-only/src/read-only-common.c133
-rw-r--r--xlators/features/read-only/src/read-only-common.h76
-rw-r--r--xlators/features/read-only/src/read-only.c22
-rw-r--r--xlators/features/read-only/src/worm.c36
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h23
-rw-r--r--xlators/features/trash/src/trash.c109
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c479
-rw-r--r--xlators/lib/src/libxlator.h104
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am55
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1115
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c3451
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2527
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1116
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c531
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c118
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c94
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c3881
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h107
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c140
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c120
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1052
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c1466
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1193
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c148
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h60
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2798
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h158
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c6059
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h367
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1593
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h122
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c1561
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c820
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h643
-rw-r--r--xlators/mount/fuse/src/Makefile.am22
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c3105
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h320
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c427
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h22
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c943
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in399
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in3
-rw-r--r--xlators/nfs/server/src/Makefile.am23
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c1144
-rw-r--r--xlators/nfs/server/src/mount3.h39
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c211
-rw-r--r--xlators/nfs/server/src/nfs-common.h27
-rw-r--r--xlators/nfs/server/src/nfs-fops.c442
-rw-r--r--xlators/nfs/server/src/nfs-fops.h41
-rw-r--r--xlators/nfs/server/src/nfs-generics.c43
-rw-r--r--xlators/nfs/server/src/nfs-generics.h30
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c73
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h19
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h30
-rw-r--r--xlators/nfs/server/src/nfs.c964
-rw-r--r--xlators/nfs/server/src/nfs.h52
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c183
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h55
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c2731
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h39
-rw-r--r--xlators/nfs/server/src/nfs3.c1026
-rw-r--r--xlators/nfs/server/src/nfs3.h126
-rw-r--r--xlators/nfs/server/src/nlm4.c2525
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c492
-rw-r--r--xlators/performance/io-cache/src/io-cache.h30
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c22
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h20
-rw-r--r--xlators/performance/io-cache/src/page.c74
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c1020
-rw-r--r--xlators/performance/io-threads/src/io-threads.h33
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h21
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h22
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3558
-rw-r--r--xlators/performance/quick-read/src/quick-read.h61
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c66
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h20
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c489
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h23
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c4259
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h106
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c56
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h23
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3610
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/addr/src/addr.c36
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c22
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c25
-rw-r--r--xlators/protocol/client/src/client-handshake.c996
-rw-r--r--xlators/protocol/client/src/client-helpers.c154
-rw-r--r--xlators/protocol/client/src/client-lk.c413
-rw-r--r--xlators/protocol/client/src/client-mem-types.h23
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c (renamed from xlators/protocol/client/src/client3_1-fops.c)3541
-rw-r--r--xlators/protocol/client/src/client.c810
-rw-r--r--xlators/protocol/client/src/client.h152
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am27
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am20
-rw-r--r--xlators/protocol/server/src/authenticate.c109
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c208
-rw-r--r--xlators/protocol/server/src/server-helpers.c1349
-rw-r--r--xlators/protocol/server/src/server-helpers.h68
-rw-r--r--xlators/protocol/server/src/server-mem-types.h20
-rw-r--r--xlators/protocol/server/src/server-resolve.c440
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6179
-rw-r--r--xlators/protocol/server/src/server.c764
-rw-r--r--xlators/protocol/server/src/server.h131
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5236
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c891
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c2782
-rw-r--r--xlators/storage/posix/src/posix.h129
-rw-r--r--xlators/system/posix-acl/src/Makefile.am10
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h44
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c457
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h71
455 files changed, 129742 insertions, 85029 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index b1643d26c..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
CLEANFILES =
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
index c0b9141c6..90370d861 100644
--- a/xlators/bindings/python/src/Makefile.am
+++ b/xlators/bindings/python/src/Makefile.am
@@ -9,7 +9,7 @@ pythondir = $(xlatordir)/python
python_so_SOURCES = python.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+AM_CFLAGS = -fPIC $(GF_CPPFLAGS) -Wall \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
$(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
index ee0eb1310..337c983ec 100644
--- a/xlators/bindings/python/src/gluster.py
+++ b/xlators/bindings/python/src/gluster.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
from glusterstack import *
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
index ba24c8165..0c071ae98 100644
--- a/xlators/bindings/python/src/glusterstack.py
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
index e9069d07c..98437d22e 100644
--- a/xlators/bindings/python/src/glustertypes.py
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
import collections
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
index 3310a2115..9b96790de 100644
--- a/xlators/bindings/python/src/python.c
+++ b/xlators/bindings/python/src/python.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <Python.h>
#ifndef _CONFIG_H
@@ -45,7 +35,7 @@ python_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
python_private_t *priv = (python_private_t *)this->private;
@@ -148,7 +138,7 @@ init (xlator_t *this)
Py_InitializeEx(0);
if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
+ gf_log ("python", GF_LOG_ERROR,
"FATAL: python should have exactly one child");
return -1;
}
@@ -166,7 +156,7 @@ init (xlator_t *this)
}
priv->pInterp = Py_NewInterpreter();
-
+
// Adjust python's path
PyObject *syspath = PySys_GetObject("path");
PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
@@ -188,7 +178,7 @@ init (xlator_t *this)
priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
+
priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
if (!priv->pScriptModule || PyErr_Occurred())
{
@@ -217,7 +207,7 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
python_private_t *priv = (python_private_t*)(this->private);
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
index 507455c85..59a991dca 100644
--- a/xlators/bindings/python/src/testxlator.py
+++ b/xlators/bindings/python/src/testxlator.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+"""
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
"""
This is a test translator written in python.
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 16ed25af1..35d18a6c0 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,21 +1,31 @@
xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c $(top_builddir)/xlators/lib/src/libxlator.c
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
+ afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
+ afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
+afr_la_LDFLAGS = -module -avoid-version
afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoidversion
+pump_la_LDFLAGS = -module -avoid-version
pump_la_SOURCES = $(afr_common_source) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h $(top_builddir)/glusterfsd/src/glusterfsd.h
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
+ afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
+ afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
+ $(top_builddir)/glusterfsd/src/glusterfsd.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 22b30fdfa..af01f2ef2 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -58,10 +49,9 @@
#include "afr-self-heald.h"
#include "pump.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-
+#define AFR_STATISTICS_HISTORY_SIZE 50
int
afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
gf_boolean_t fail_conflict);
@@ -92,6 +82,11 @@ afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
path, priv->pending_key[i]);
/* 3 = data+metadata+entry */
}
+ ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
+ "lookup", path);
+ }
}
int
@@ -123,6 +118,13 @@ afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
loc->path, GLUSTERFS_ENTRYLK_COUNT);
}
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ }
+
ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -200,60 +202,86 @@ out:
return ret;
}
-afr_inode_ctx_t*
-afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
+void
+afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
{
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- size_t size = 0;
+ if (!ctx)
+ return;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+}
- GF_ASSERT (child_count > 0);
+afr_inode_ctx_t*
+__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
- if (!addr) {
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto out;
- size = sizeof (*ctx->fresh_children);
- ctx->fresh_children = GF_CALLOC (child_count, size,
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto out;
- } else {
- ctx = (afr_inode_ctx_t*) (long) addr;
+ priv = this->private;
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (afr_inode_ctx_t*) (long) ctx_addr;
+ goto out;
}
- ret = 0;
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*ctx->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+
out:
- if (ret && ctx) {
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
- ctx = NULL;
+ return ctx;
+
+fail:
+ afr_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
}
+ UNLOCK (&inode->lock);
return ctx;
}
void
-afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
{
GF_ASSERT (inode);
GF_ASSERT (params);
- int ret = 0;
afr_inode_ctx_t *ctx = NULL;
afr_private_t *priv = NULL;
int i = 0;
- uint64_t ctx_addr = 0;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- goto unlock;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ ctx = __afr_inode_ctx_get (inode, this);
if (!ctx)
goto unlock;
switch (params->op) {
@@ -272,12 +300,6 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
params->u.value = _gf_true;
break;
- case AFR_INODE_GET_SPLIT_BRAIN:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK)
- params->u.value = _gf_true;
- ;
- break;
default:
GF_ASSERT (0);
break;
@@ -290,11 +312,16 @@ unlock:
gf_boolean_t
afr_is_split_brain (xlator_t *this, inode_t *inode)
{
- afr_inode_params_t params = {0};
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t spb = _gf_false;
- params.op = AFR_INODE_GET_SPLIT_BRAIN;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ ctx = afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto out;
+ if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
+ spb = _gf_true;
+out:
+ return spb;
}
gf_boolean_t
@@ -303,11 +330,10 @@ afr_is_opendir_done (xlator_t *this, inode_t *inode)
afr_inode_params_t params = {0};
params.op = AFR_INODE_GET_OPENDIR_DONE;
- afr_inode_get_ctx (this, inode, &params);
+ afr_inode_get_ctx_params (this, inode, &params);
return params.u.value;
}
-
int32_t
afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
{
@@ -315,7 +341,7 @@ afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
params.op = AFR_INODE_GET_READ_CTX;
params.u.read_ctx.children = fresh_children;
- afr_inode_get_ctx (this, inode, &params);
+ afr_inode_get_ctx_params (this, inode, &params);
return params.u.read_ctx.read_child;
}
@@ -377,31 +403,14 @@ afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
}
void
-afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
-
- if (set) {
- remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = remaining_mask | mask;
- } else {
- ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- }
-}
-
-void
-afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
{
GF_ASSERT (inode);
GF_ASSERT (params);
- int ret = 0;
afr_inode_ctx_t *ctx = NULL;
afr_private_t *priv = NULL;
- uint64_t ctx_addr = 0;
- gf_boolean_t set = _gf_false;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
int32_t *stale_children = NULL;
@@ -409,10 +418,7 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ ctx = __afr_inode_ctx_get (inode, this);
if (!ctx)
goto unlock;
switch (params->op) {
@@ -432,33 +438,26 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
case AFR_INODE_SET_OPENDIR_DONE:
afr_inode_ctx_set_opendir_done (ctx);
break;
- case AFR_INODE_SET_SPLIT_BRAIN:
- set = params->u.value;
- afr_inode_ctx_set_splitbrain (ctx, set);
- break;
default:
GF_ASSERT (0);
break;
}
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
}
unlock:
UNLOCK (&inode->lock);
}
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb)
{
- afr_inode_params_t params = {0};
+ afr_inode_ctx_t *ctx = NULL;
- params.op = AFR_INODE_SET_SPLIT_BRAIN;
- params.u.value = set;
- afr_inode_set_ctx (this, inode, &params);
+ ctx = afr_inode_ctx_get (inode, this);
+ if (mdata_spb != DONT_KNOW)
+ ctx->mdata_spb = mdata_spb;
+ if (data_spb != DONT_KNOW)
+ ctx->data_spb = data_spb;
}
void
@@ -467,7 +466,7 @@ afr_set_opendir_done (xlator_t *this, inode_t *inode)
afr_inode_params_t params = {0};
params.op = AFR_INODE_SET_OPENDIR_DONE;
- afr_inode_set_ctx (this, inode, &params);
+ afr_inode_set_ctx_params (this, inode, &params);
}
void
@@ -486,7 +485,7 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
params.op = AFR_INODE_SET_READ_CTX;
params.u.read_ctx.read_child = read_child;
params.u.read_ctx.children = fresh_children;
- afr_inode_set_ctx (this, inode, &params);
+ afr_inode_set_ctx_params (this, inode, &params);
}
void
@@ -499,7 +498,7 @@ afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
params.op = AFR_INODE_RM_STALE_CHILDREN;
params.u.read_ctx.children = stale_children;
- afr_inode_set_ctx (this, inode, &params);
+ afr_inode_set_ctx_params (this, inode, &params);
}
gf_boolean_t
@@ -543,6 +542,10 @@ afr_is_read_child (int32_t *success_children, int32_t *sources,
gf_boolean_t success_child = _gf_false;
gf_boolean_t source = _gf_false;
+ if (child < 0) {
+ return _gf_false;
+ }
+
GF_ASSERT (success_children);
GF_ASSERT (child_count > 0);
@@ -559,29 +562,69 @@ out:
return (success_child && source);
}
+int32_t
+afr_hash_child (int32_t *success_children, int32_t child_count,
+ unsigned int hmode, uuid_t gfid)
+{
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
+
+ if (!hmode) {
+ return -1;
+ }
+
+ if (gfid) {
+ uuid_copy(gfid_copy,gfid);
+ }
+ if (hmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
+
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
+
/* If sources is NULL the xattrs are assumed to be of source for all
* success_children.
*/
int
-afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources)
+afr_select_read_child_from_policy (int32_t *success_children,
+ int32_t child_count, int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid)
{
int32_t read_child = -1;
int i = 0;
GF_ASSERT (success_children);
- read_child = prev_read_child;
+ read_child = config_read_child;
if (afr_is_read_child (success_children, sources, child_count,
read_child))
goto out;
- read_child = config_read_child;
+ read_child = prev_read_child;
if (afr_is_read_child (success_children, sources, child_count,
read_child))
goto out;
+ read_child = afr_hash_child (success_children, child_count,
+ hmode, gfid);
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child)) {
+ goto out;
+ }
+
for (i = 0; i < child_count; i++) {
read_child = success_children[i];
if (read_child < 0)
@@ -601,7 +644,7 @@ out:
void
afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child)
+ int32_t config_read_child, uuid_t gfid)
{
int read_child = -1;
afr_private_t *priv = NULL;
@@ -611,7 +654,8 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
priv->child_count,
prev_read_child,
config_read_child,
- NULL);
+ NULL,
+ priv->hash_mode, gfid);
if (read_child >= 0)
afr_inode_set_read_ctx (this, inode, read_child,
fresh_children);
@@ -667,8 +711,11 @@ afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
GF_ASSERT (call_child);
GF_ASSERT (last_index);
GF_ASSERT (fresh_children);
- GF_ASSERT (read_child >= 0);
+ if (read_child < 0) {
+ ret = -EIO;
+ goto out;
+ }
priv = this->private;
*call_child = -1;
*last_index = -1;
@@ -717,81 +764,66 @@ out:
}
void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
+{
+ afr_reset_xattr (xattr, child_count);
+ GF_FREE (xattr);
+}
+
+void
afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
{
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
-
sh = &local->self_heal;
priv = this->private;
- if (sh->buf)
- GF_FREE (sh->buf);
+ if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
+ GF_FREE (sh->data_sh_info);
+
+ if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
+ GF_FREE (sh->metadata_sh_info);
- if (sh->parentbufs)
- GF_FREE (sh->parentbufs);
+ GF_FREE (sh->buf);
+
+ GF_FREE (sh->parentbufs);
if (sh->inode)
inode_unref (sh->inode);
- if (sh->xattr) {
- afr_reset_xattr (sh->xattr, priv->child_count);
- GF_FREE (sh->xattr);
- }
+ afr_xattr_array_destroy (sh->xattr, priv->child_count);
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ GF_FREE (sh->child_errno);
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
- }
+ afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
+ afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
- }
+ GF_FREE (sh->sources);
- if (sh->sources)
- GF_FREE (sh->sources);
+ GF_FREE (sh->success);
- if (sh->success)
- GF_FREE (sh->success);
-
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+ GF_FREE (sh->locked_nodes);
if (sh->healing_fd) {
fd_unref (sh->healing_fd);
sh->healing_fd = NULL;
}
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
+ GF_FREE ((char *)sh->linkname);
- if (sh->success_children)
- GF_FREE (sh->success_children);
+ GF_FREE (sh->success_children);
- if (sh->fresh_children)
- GF_FREE (sh->fresh_children);
+ GF_FREE (sh->fresh_children);
- if (sh->fresh_parent_dirs)
- GF_FREE (sh->fresh_parent_dirs);
+ GF_FREE (sh->fresh_parent_dirs);
loc_wipe (&sh->parent_loc);
loc_wipe (&sh->lookup_loc);
- if (sh->checksum)
- GF_FREE (sh->checksum);
+ GF_FREE (sh->checksum);
- if (sh->write_needed)
- GF_FREE (sh->write_needed);
+ GF_FREE (sh->write_needed);
if (sh->healing_fd)
fd_unref (sh->healing_fd);
}
@@ -800,34 +832,26 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
{
- int i = 0;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
- }
-
- GF_FREE (local->pending);
-
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ afr_matrix_cleanup (local->transaction.txn_changelog,
+ priv->child_count);
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+ GF_FREE (local->internal_lock.locked_nodes);
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+ afr_entry_lockee_cleanup (&local->internal_lock);
GF_FREE (local->transaction.pre_op);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
GF_FREE (local->transaction.eager_lock);
GF_FREE (local->transaction.basename);
@@ -835,6 +859,8 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
loc_wipe (&local->transaction.parent_loc);
loc_wipe (&local->transaction.new_parent_loc);
+
+ GF_FREE (local->transaction.postop_piggybacked);
}
@@ -861,14 +887,16 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->xattr_req)
dict_unref (local->xattr_req);
- if (local->child_up)
- GF_FREE (local->child_up);
+ if (local->dict)
+ dict_unref (local->dict);
+
+ GF_FREE(local->replies);
+
+ GF_FREE (local->child_up);
- if (local->fresh_children)
- GF_FREE (local->fresh_children);
+ GF_FREE (local->child_errno);
- if (local->fd_open_on)
- GF_FREE (local->fd_open_on);
+ GF_FREE (local->fresh_children);
{ /* lookup */
if (local->cont.lookup.xattrs) {
@@ -886,27 +914,23 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
inode_unref (local->cont.lookup.inode);
}
- if (local->cont.lookup.postparents)
- GF_FREE (local->cont.lookup.postparents);
+ GF_FREE (local->cont.lookup.postparents);
- if (local->cont.lookup.bufs)
- GF_FREE (local->cont.lookup.bufs);
+ GF_FREE (local->cont.lookup.bufs);
- if (local->cont.lookup.success_children)
- GF_FREE (local->cont.lookup.success_children);
+ GF_FREE (local->cont.lookup.success_children);
- if (local->cont.lookup.sources)
- GF_FREE (local->cont.lookup.sources);
+ GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
}
{ /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
+ GF_FREE (local->cont.getxattr.name);
}
{ /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
+ GF_FREE (local->cont.lk.locked_nodes);
}
{ /* create */
@@ -940,18 +964,40 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
dict_unref (local->cont.setxattr.dict);
}
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
{ /* removexattr */
GF_FREE (local->cont.removexattr.name);
}
-
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
{ /* symlink */
GF_FREE (local->cont.symlink.linkpath);
}
{ /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ GF_FREE (local->cont.opendir.checksum);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
}
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
}
@@ -1034,34 +1080,144 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
uuid_copy (loc->pargfid, postparent->ia_gfid);
}
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+static void
+afr_handle_quota_size (afr_local_t *local, xlator_t *this,
+ dict_t *rsp_dict)
+{
+ int32_t *sources = NULL;
+ dict_t *xattr = NULL;
+ data_t *max_data = NULL;
+ int64_t max_quota_size = -1;
+ data_t *data = NULL;
+ int64_t *size = NULL;
+ int64_t quota_size = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t source_present = _gf_false;
+
+ priv = this->private;
+ sources = local->cont.lookup.sources;
+
+ if (rsp_dict == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
+ "response dictionary", local->loc.path);
+ return;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source_present = _gf_true;
+ break;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * If there is at least one source lets check
+ * for maximum quota sizes among sources, otherwise take the
+ * maximum of the ones present to be on the safer side.
+ */
+ if (source_present && !sources[i])
+ continue;
+
+ xattr = local->cont.lookup.xattrs[i];
+ if (!xattr)
+ continue;
+
+ data = dict_get (xattr, QUOTA_SIZE_KEY);
+ if (!data)
+ continue;
+
+ size = (int64_t*)data->data;
+ quota_size = ntoh64(*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
+ local->loc.path, i, quota_size);
+ if (quota_size > max_quota_size) {
+ if (max_data)
+ data_unref (max_data);
+
+ max_quota_size = quota_size;
+ max_data = data_ref (data);
+ }
+ }
+
+ if (max_data) {
+ ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "quota size", local->loc.path);
+ }
+
+ data_unref (max_data);
+ }
+}
+
int
afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
{
- int32_t read_child = -1;
struct iatt *buf = NULL;
struct iatt *postparent = NULL;
dict_t **xattr = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
int ret = 0;
+ int i = 0;
GF_ASSERT (local);
buf = &local->cont.lookup.buf;
postparent = &local->cont.lookup.postparent;
xattr = &local->cont.lookup.xattr;
+ priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
- NULL);
+ local->fresh_children);
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+ success_children = local->cont.lookup.success_children;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_children_intersection_get (local->fresh_children, success_children,
+ sources, priv->child_count);
+ if (!sources[read_child]) {
+ read_child = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ read_child = i;
+ break;
+ }
+ }
+ }
if (read_child < 0) {
ret = -1;
goto out;
}
+
gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
read_child);
if (!*xattr)
*xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+
*buf = local->cont.lookup.bufs[read_child];
*postparent = local->cont.lookup.postparents[read_child];
+ if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
+ afr_handle_quota_size (local, this, *xattr);
+
if (IA_INVAL == local->cont.lookup.inode->ia_type) {
/* fix for RT #602 */
local->cont.lookup.inode->ia_type = buf->ia_type;
@@ -1077,6 +1233,7 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
uint32_t inodelk_count = 0;
uint32_t entrylk_count = 0;
int ret = -1;
+ uint32_t parent_entrylk = 0;
GF_ASSERT (local);
GF_ASSERT (this);
@@ -1092,43 +1249,103 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
&entrylk_count);
if (ret == 0)
local->entrylk_count += entrylk_count;
+ ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
+ &parent_entrylk);
+ if (!ret)
+ local->cont.lookup.parent_entrylk += parent_entrylk;
}
+/*
+ * It's important to maintain a commutative property on do_*_self_heal and
+ * found*; once set, they must not be cleared by a subsequent iteration or
+ * call, so that they represent a logical OR of all iterations and calls
+ * regardless of child/key order. That allows the caller to call us multiple
+ * times without having to use a separate variable as a "reduce" accumulator.
+ */
static void
afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
dict_t *xattr)
{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ void *pending_raw = NULL;
+ int32_t *pending = NULL;
+
GF_ASSERT (local);
GF_ASSERT (this);
GF_ASSERT (xattr);
- if (afr_sh_has_metadata_pending (xattr, this)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
+ priv = this->private;
- if (afr_sh_has_entry_pending (xattr, this)) {
- local->self_heal.do_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xattr, priv->pending_key[i],
+ &pending_raw);
+ if (ret != 0) {
+ continue;
+ }
+ pending = pending_raw;
- if (afr_sh_has_data_pending (xattr, this)) {
- local->self_heal.do_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
+ if (pending[AFR_METADATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_ENTRY_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_entry_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_DATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
}
}
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
static void
afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
struct iatt *buf, struct iatt *lookup_buf)
{
if (PERMISSION_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"permissions differ for %s ", local->loc.path);
local->self_heal.do_metadata_self_heal = _gf_true;
}
@@ -1136,27 +1353,45 @@ afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"ownership differs for %s ", local->loc.path);
}
if (SIZE_DIFFERS (buf, lookup_buf)
&& IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"size differs for %s ", local->loc.path);
local->self_heal.do_data_self_heal = _gf_true;
}
if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
/* mismatching gfid */
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s: gfid different on subvolume", local->loc.path);
}
}
static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
- gf_boolean_t split_brain)
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
+{
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
{
GF_ASSERT (local);
GF_ASSERT (this);
@@ -1167,24 +1402,11 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
local->self_heal.do_entry_self_heal = _gf_true;
local->self_heal.do_gfid_self_heal = _gf_true;
local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
+ gf_log(this->name, GF_LOG_DEBUG,
"entries are missing in lookup of %s.",
local->loc.path);
- //If all self-heals are needed no need to check for other rules
- goto out;
}
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
-
-out:
return;
}
@@ -1194,6 +1416,8 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
GF_ASSERT (sh);
GF_ASSERT (priv);
+ if (sh->force_confirm_spb)
+ return _gf_true;
return (sh->do_gfid_self_heal
|| sh->do_missing_entry_self_heal
|| (afr_data_self_heal_enabled (priv->data_self_heal) &&
@@ -1227,6 +1451,7 @@ afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
dict_t **xattrs = NULL;
int32_t *success_children = NULL;
afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ uuid_t *gfid = NULL;
GF_ASSERT (local);
GF_ASSERT (this);
@@ -1240,8 +1465,9 @@ afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
type = afr_transaction_type_get (ia_type);
xattrs = local->cont.lookup.xattrs;
+ gfid = &local->cont.lookup.buf.ia_gfid;
source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
- type);
+ type, *gfid);
if (source < 0) {
gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
"for %s", local->loc.path);
@@ -1269,7 +1495,8 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
xlator_t *this),
int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno))
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed))
{
afr_local_t *local = NULL;
char sh_type_str[256] = {0,};
@@ -1292,7 +1519,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (background)
bg = "background";
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s %s self-heal triggered. path: %s, reason: %s", bg,
sh_type_str, local->loc.path, reason);
@@ -1363,7 +1590,7 @@ afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
child2 = &bufs[success_children[i-1]];
if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
"differs on subvolumes (%d, %d)", path,
success_children[i-1], success_children[i]);
conflicting = _gf_true;
@@ -1372,7 +1599,7 @@ afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
if (!gfid || uuid_is_null (child1->ia_gfid))
continue;
if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
" on subvolume %d", path, success_children[i]);
conflicting = _gf_true;
goto out;
@@ -1455,13 +1682,11 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
int32_t child1 = -1;
int32_t child2 = -1;
afr_self_heal_t *sh = NULL;
- gf_boolean_t split_brain = _gf_false;
priv = this->private;
sh = &local->self_heal;
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- afr_detect_self_heal_by_lookup_status (local, this, split_brain);
+ afr_detect_self_heal_by_lookup_status (local, this);
if (afr_lookup_gfid_missing_count (local, this))
local->self_heal.do_gfid_self_heal = _gf_true;
@@ -1488,23 +1713,28 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
afr_lookup_set_self_heal_params_by_xattr (local, this,
xattr[child1]);
}
- if (afr_open_only_data_self_heal (priv->data_self_heal)
- && !split_brain)
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
}
int
afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed)
{
afr_local_t *local = NULL;
+ int ret = -1;
+ dict_t *xattr = NULL;
local = frame->local;
if (op_ret == -1) {
local->op_ret = -1;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local->op_errno = afr_most_important_error(local->op_errno,
+ op_errno, _gf_true);
goto out;
} else {
@@ -1512,6 +1742,23 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
}
afr_lookup_done_success_action (frame, this, _gf_true);
+ xattr = local->cont.lookup.xattr;
+ if (xattr) {
+ ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "sh-failed to %d", local->loc.path, sh_failed);
+
+ if (local->self_heal.actual_sh_started == _gf_true &&
+ sh_failed == 0) {
+ ret = dict_set_int32 (xattr, "actual-sh-done", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
+ " set actual-sh-done to %d",
+ local->loc.path,
+ local->self_heal.actual_sh_started);
+ }
+ }
out:
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->cont.lookup.inode, &local->cont.lookup.buf,
@@ -1585,7 +1832,8 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
afr_lookup_set_self_heal_params (local, this);
if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_transaction_running (local))
+ if (afr_is_transaction_running (local) &&
+ (!local->allow_sh_for_running_transaction))
goto out;
reason = "lookup detected pending operations";
@@ -1646,26 +1894,23 @@ afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
int32_t read_child = -1;
int32_t ret = -1;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ gf_boolean_t fresh_lookup = _gf_false;
local = frame->local;
- priv = this->private;
+ fresh_lookup = local->cont.lookup.fresh_lookup;
if (local->loc.parent == NULL)
fail_conflict = _gf_true;
- if (afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name)) {
+ if (afr_lookup_conflicting_entries (local, this)) {
if (fail_conflict == _gf_false)
ret = 0;
goto out;
}
- if (!afr_is_transaction_running (local)) {
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (ret)
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (!afr_is_transaction_running (local) || fresh_lookup) {
+ if (read_child < 0)
goto out;
ret = afr_lookup_set_read_ctx (local, this, read_child);
@@ -1676,11 +1921,9 @@ afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
ret = afr_lookup_build_response_params (local, this);
if (ret)
goto out;
- if (afr_is_fresh_lookup (&local->loc, this)) {
- afr_update_loc_gfids (&local->loc,
- &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
- }
+ afr_update_loc_gfids (&local->loc,
+ &local->cont.lookup.buf,
+ &local->cont.lookup.postparent);
ret = 0;
out:
@@ -1691,6 +1934,135 @@ out:
return ret;
}
+int
+afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+ int lsubvol = -1;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if (uuid_is_null (bufs[child].ia_gfid))
+ continue;
+ if (lsubvol < 0) {
+ lsubvol = child;
+ } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
+ lsubvol = child;
+ } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
+ (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
+ lsubvol = child;
+ }
+ }
+ return lsubvol;
+}
+
+void
+afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
+ int subvol)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ memcpy (local->fresh_children, success_children,
+ sizeof (*success_children) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ child = local->fresh_children[i];
+ if (child == -1)
+ break;
+ if (child == subvol)
+ continue;
+ if (uuid_is_null (bufs[child].ia_gfid) &&
+ (bufs[child].ia_type == bufs[subvol].ia_type))
+ continue;
+ afr_children_rm_child (success_children, child,
+ priv->child_count);
+ local->success_count--;
+ }
+ afr_reset_children (local->fresh_children, priv->child_count);
+}
+
+void
+afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+{
+ int lsubvol = 0;
+
+ if (!afr_lookup_conflicting_entries (local, this))
+ goto out;
+
+ lsubvol = afr_lookup_get_latest_subvol (local, this);
+ if (lsubvol < 0)
+ goto out;
+ afr_lookup_mark_other_entries_stale (local, this, lsubvol);
+out:
+ return;
+}
+
+gf_boolean_t
+afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
+{
+ /*
+ * We need to perform this test in lookup done and treat on going
+ * create/DELETE as ENOENT.
+ * Reason:
+ Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
+
+ 1 Client A is in the middle of mkdir(/a). It has acquired lock.
+ It has performed mkdir(/a) on one subvol, and second one is still
+ in progress
+ 2 Client B performs a lookup, sees directory /a on one,
+ ENOENT on the other, succeeds lookup.
+ 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
+ (one subvol because /a/b does not exist, another because /a
+ itself does not exist)
+ 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
+ basename=b on one subvol, but fails on other subvol as /a is yet to
+ be created by Client A.
+ 5 Client A finishes mkdir of /a on other subvol
+ 6 Client C also attempts to create /a/b, lookup returns ENOENT on
+ both subvols.
+ 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
+ obtains on one subvol (where B had failed), and waits for B to unlock
+ on other subvol.
+ 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
+ transaction and unlocks
+ 9 Client C gets the lock on the second subvol, At this stage second
+ subvol already has /a/b created from Client B, but Client C does not
+ check that in the middle of mkdir transaction
+ 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
+ ONLY ONE (where Client B could not get lock because of
+ missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
+ This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
+ Client B performed transaction on only one subvol (because entrylk()
+ could not be obtained on second subvol because of missing parent dir --
+ caused by premature/speculative succeeding of lookup() on /a when locks
+ are detected). Other subvol gets GFID-2 from Client C because while
+ it was waiting for entrylk() on both subvols, Client B was in the
+ middle of creating mkdir() on only one subvol, and Client C does not
+ "expect" this when it is between lock() and pre-op()/op() phase of the
+ transaction.
+ */
+ if (local->cont.lookup.parent_entrylk && local->enoent_count)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
static void
afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
@@ -1707,8 +2079,18 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
priv = this->private;
local = frame->local;
+ if (afr_is_entry_possibly_under_creation (local, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
+
if (local->op_ret < 0)
goto unwind;
+
+ if (local->cont.lookup.parent_entrylk && local->success_count > 1)
+ afr_succeed_lookup_on_latest_iatt (local, this);
+
gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
up_children_count = afr_up_children_count (local->child_up,
priv->child_count);
@@ -1760,25 +2142,20 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ESTALE > ENOENT > others
- *
+ * The hierarchy is ESTALE > EIO > ENOENT > others
*/
-
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno)
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
{
- gf_boolean_t ret = _gf_true;
-
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
-
- /* Nothing should overwrite ENOENT, except ESTALE/EIO*/
- else if ((old_errno == ENOENT) && (new_errno != ESTALE)
- && (new_errno != EIO))
- ret = _gf_false;
-
- return ret;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (eio && (old_errno == EIO || new_errno == EIO))
+ return EIO;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+
+ return new_errno;
}
int32_t
@@ -1797,8 +2174,9 @@ afr_resultant_errno_get (int32_t *children,
} else {
child = i;
}
- if (afr_error_more_important (op_errno, child_errno[child]))
- op_errno = child_errno[child];
+ op_errno = afr_most_important_error(op_errno,
+ child_errno[child],
+ _gf_false);
}
return op_errno;
}
@@ -1810,8 +2188,8 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
if (op_errno == ENOENT)
local->enoent_count++;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
if (local->op_errno == ESTALE) {
local->op_ret = -1;
@@ -1858,12 +2236,79 @@ afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
afr_set_root_inode_on_first_lookup (local, this, inode);
}
+static int32_t
+afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ priv = this->private;
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ child_index = (int32_t)(long)cookie;
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
+
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
static void
afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
+ afr_private_t *priv = this->private;
+
if (local->success_count == 0) {
if (local->op_errno != ESTALE) {
local->op_ret = op_ret;
@@ -1876,6 +2321,11 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind
afr_lookup_cache_args (local, child_index, xattr,
buf, postparent);
+
+ if (local->do_discovery && (priv->read_child == (-1))) {
+ afr_attempt_local_discovery(this,child_index);
+ }
+
local->cont.lookup.success_children[local->success_count] = child_index;
local->success_count++;
}
@@ -1923,6 +2373,7 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
struct iatt *iatts = NULL;
int32_t *success_children = NULL;
int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
GF_ASSERT (local);
local->cont.lookup.xattrs = GF_CALLOC (child_count,
@@ -1955,6 +2406,11 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
goto out;
local->cont.lookup.sources = sources;
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
ret = 0;
out:
return ret;
@@ -1972,37 +2428,51 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
int call_count = 0;
uint64_t ctx = 0;
int32_t op_errno = 0;
-
+ int allow_sh = 0;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
local->op_ret = -1;
frame->local = local;
local->fop = GF_FOP_LOOKUP;
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
+ loc_copy (&local->loc, loc);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0) {
+ op_errno = EINVAL;
goto out;
}
- loc_copy (&local->loc, loc);
+ if (local->loc.path &&
+ (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
+ op_errno = EPERM;
+ ret = -1;
+ goto out;
+ }
- ret = inode_ctx_get (loc->inode, this, &ctx);
+ ret = inode_ctx_get (local->loc.inode, this, &ctx);
if (ret == 0) {
/* lookup is a revalidate */
local->read_child_index = afr_inode_get_read_ctx (this,
- loc->inode,
- NULL);
+ local->loc.inode,
+ NULL);
} else {
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
+ if (priv->hash_mode) {
+ local->read_child_index = -1;
+ }
+ else {
+ local->read_child_index =
+ (++priv->read_child_rr) %
+ (priv->child_count);
+ }
}
UNLOCK (&priv->read_child_lock);
+ local->cont.lookup.fresh_lookup = _gf_true;
}
local->child_up = memdup (priv->child_up,
@@ -2030,24 +2500,33 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
/* By default assume ENOTCONN. On success it will be set to 0. */
local->op_errno = ENOTCONN;
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, loc,
+ ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
+ &allow_sh);
+ dict_del (xattr_req, "allow-sh-for-running-transaction");
+ local->allow_sh_for_running_transaction = allow_sh;
+
+ ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
&gfid_req);
if (ret) {
local->op_errno = -ret;
goto out;
}
afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
- loc);
+ &local->loc);
local->fop = GF_FOP_LOOKUP;
+ if (priv->choose_local && !priv->did_discovery) {
+ if (gfid_req && __is_root_gfid(gfid_req)) {
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_lookup_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->lookup,
- loc, local->xattr_req);
+ &local->loc, local->xattr_req);
if (!--call_count)
break;
}
@@ -2066,7 +2545,7 @@ out:
/* {{{ open */
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
afr_private_t * priv = NULL;
int ret = -1;
@@ -2078,216 +2557,165 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)
priv = this->private;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
+ ret = __fd_ctx_get (fd, this, &ctx);
- if (ret == 0)
- goto unlock;
+ if (ret == 0)
+ goto out;
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_acquired) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->locked_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- INIT_LIST_HEAD (&fd_ctx->paused_calls);
- INIT_LIST_HEAD (&fd_ctx->entries);
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+ INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
- }
-unlock:
- UNLOCK (&fd->lock);
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
out:
return ret;
}
-/* {{{ flush */
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ int ret = -1;
- LOCK (&frame->lock);
+ LOCK (&fd->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
+ ret = __afr_fd_ctx_set (this, fd);
}
+ UNLOCK (&fd->lock);
- return 0;
+ return ret;
}
+/* {{{ flush */
int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
if (local->success_count == 0) {
local->op_ret = op_ret;
}
local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
}
local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
- if (need_unwind)
- afr_flush_unwind (frame, this);
+ call_count = afr_frame_return (frame);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret,
+ local->op_errno, NULL);
return 0;
}
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
- local = frame->local;
priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ local = frame->local;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- local->fd);
-
+ local->fd, NULL);
if (!--call_count)
break;
+
}
}
return 0;
}
-
int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
+ call_stub_t *stub = NULL;
int ret = -1;
int op_errno = 0;
@@ -2297,47 +2725,27 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->op = GF_FOP_FLUSH;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
+ ret = afr_local_init(local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
+ local->fd = fd_ref(fd);
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub) {
+ ret = -1;
+ op_errno = ENOMEM;
goto out;
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+ ret = 0;
- ret = 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, -1, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2351,8 +2759,6 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
ret = fd_ctx_get (fd, this, &ctx);
if (ret < 0)
@@ -2361,28 +2767,18 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ GF_FREE (fd_ctx->pre_op_done);
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ GF_FREE (fd_ctx->opened_on);
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ GF_FREE (fd_ctx->locked_on);
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- list_del_init (&paused_call->call_list);
- GF_FREE (paused_call);
- }
+ GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_piggyback);
- if (fd_ctx->lock_piggyback)
- GF_FREE (fd_ctx->lock_piggyback);
+ GF_FREE (fd_ctx->lock_acquired);
- if (fd_ctx->lock_acquired)
- GF_FREE (fd_ctx->lock_acquired);
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
GF_FREE (fd_ctx);
}
@@ -2420,14 +2816,25 @@ afr_release (xlator_t *this, fd_t *fd)
/* {{{ fsync */
int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
int child_index = (long) cookie;
int read_child = 0;
+ call_stub_t *stub = NULL;
local = frame->local;
@@ -2443,13 +2850,13 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = 0;
if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
local->success_count++;
@@ -2462,9 +2869,32 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
}
return 0;
@@ -2473,7 +2903,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+ int32_t datasync, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2488,7 +2918,7 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -2499,13 +2929,17 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->fd = fd_ref (fd);
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_fsync_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
@@ -2514,7 +2948,7 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -2524,7 +2958,8 @@ out:
int32_t
afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2544,7 +2979,7 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2552,7 +2987,7 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+ int32_t datasync, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2567,7 +3002,7 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -2581,7 +3016,7 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
STACK_WIND (frame, afr_fsyncdir_cbk,
priv->children[i],
priv->children[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
@@ -2590,7 +3025,7 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
@@ -2601,7 +3036,7 @@ out:
int32_t
afr_xattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2610,8 +3045,11 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2621,7 +3059,7 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.xattrop.xattr, xdata);
return 0;
}
@@ -2629,7 +3067,7 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2644,7 +3082,7 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -2658,7 +3096,7 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame, afr_xattrop_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
- loc, optype, xattr);
+ loc, optype, xattr, xdata);
if (!--call_count)
break;
}
@@ -2667,7 +3105,7 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2678,7 +3116,7 @@ out:
int32_t
afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2688,8 +3126,12 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2699,7 +3141,7 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.fxattrop.xattr, xdata);
return 0;
}
@@ -2707,7 +3149,7 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2722,7 +3164,7 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -2736,7 +3178,7 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
STACK_WIND (frame, afr_fxattrop_cbk,
priv->children[i],
priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
+ fd, optype, xattr, xdata);
if (!--call_count)
break;
}
@@ -2745,7 +3187,7 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2754,7 +3196,7 @@ out:
int32_t
afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2775,7 +3217,7 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2783,7 +3225,8 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2798,7 +3241,7 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -2812,7 +3255,7 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_inodelk_cbk,
priv->children[i],
priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
if (!--call_count)
break;
@@ -2822,14 +3265,15 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (inodelk, frame, -1, op_errno);
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2850,7 +3294,7 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2858,7 +3302,8 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2873,7 +3318,7 @@ afr_finodelk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -2887,7 +3332,7 @@ afr_finodelk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_finodelk_cbk,
priv->children[i],
priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
if (!--call_count)
break;
@@ -2897,15 +3342,14 @@ afr_finodelk (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (finodelk, frame, -1, op_errno);
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2925,7 +3369,7 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2934,7 +3378,8 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2949,7 +3394,7 @@ afr_entrylk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -2963,7 +3408,7 @@ afr_entrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_entrylk_cbk,
priv->children[i],
priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
if (!--call_count)
break;
@@ -2973,7 +3418,7 @@ afr_entrylk (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (entrylk, frame, -1, op_errno);
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -2981,7 +3426,7 @@ out:
int32_t
afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3002,7 +3447,7 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -3011,7 +3456,8 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -3026,7 +3472,7 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -3040,7 +3486,7 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_fentrylk_cbk,
priv->children[i],
priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
if (!--call_count)
break;
@@ -3050,14 +3496,14 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno);
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
afr_statfs_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
+ struct statvfs *statvfs, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = 0;
@@ -3088,7 +3534,7 @@ afr_statfs_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
+ &local->cont.statfs.buf, xdata);
return 0;
}
@@ -3096,7 +3542,7 @@ afr_statfs_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
afr_private_t * priv = NULL;
int child_count = 0;
@@ -3113,7 +3559,7 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
priv = this->private;
child_count = priv->child_count;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -3127,7 +3573,7 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_statfs_cbk,
priv->children[i],
priv->children[i]->fops->statfs,
- loc);
+ loc, xdata);
if (!--call_count)
break;
}
@@ -3136,14 +3582,15 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -3153,7 +3600,7 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0)
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
+ lock, xdata);
return 0;
}
@@ -3175,7 +3622,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
return 0;
}
@@ -3189,7 +3636,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->lk,
local->fd, F_SETLK,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, NULL);
if (!--call_count)
break;
@@ -3202,7 +3649,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -3237,12 +3684,12 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index],
priv->children[child_index]->fops->lk,
local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, xdata);
} else if (local->op_ret == -1) {
/* all nodes have gone down */
AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
} else {
/* locking has succeeded on all nodes that are up */
@@ -3260,7 +3707,7 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
}
return 0;
@@ -3269,7 +3716,7 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -3283,7 +3730,7 @@ afr_lk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -3307,12 +3754,12 @@ afr_lk (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
priv->children[i],
priv->children[i]->fops->lk,
- fd, cmd, flock);
+ fd, cmd, flock, xdata);
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -3328,8 +3775,7 @@ afr_forget (xlator_t *this, inode_t *inode)
goto out;
ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx->fresh_children);
GF_FREE (ctx);
out:
return 0;
@@ -3396,7 +3842,7 @@ find_child_index (xlator_t *this, xlator_t *child)
int32_t
afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+ void *data, void *data2)
{
afr_private_t *priv = NULL;
int i = -1;
@@ -3409,12 +3855,22 @@ afr_notify (xlator_t *this, int32_t event,
int ret = -1;
int call_psh = 0;
int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
priv = this->private;
if (!priv)
return 0;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
had_heard_from_all = 1;
for (i = 0; i < priv->child_count; i++) {
if (!priv->last_event[i]) {
@@ -3442,9 +3898,19 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_UP:
LOCK (&priv->lock);
{
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ }
priv->child_up[idx] = 1;
- priv->up_count++;
+ call_psh = 1;
+ up_child = idx;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 1)
up_children++;
@@ -3454,12 +3920,6 @@ afr_notify (xlator_t *this, int32_t event,
"going online.", ((xlator_t *)data)->name);
} else {
event = GF_EVENT_CHILD_MODIFIED;
- gf_log (this->name, GF_LOG_INFO, "subvol %d came up, "
- "start crawl", idx);
- if (had_heard_from_all) {
- call_psh = 1;
- up_child = idx;
- }
}
priv->last_event[idx] = event;
@@ -3471,8 +3931,22 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_DOWN:
LOCK (&priv->lock);
{
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ }
priv->child_up[idx] = 0;
- priv->down_count++;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 0)
@@ -3500,10 +3974,11 @@ afr_notify (xlator_t *this, int32_t event,
break;
- case GF_EVENT_TRIGGER_HEAL:
- gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered"
- " manually. Start crawling");
- call_psh = 1;
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
break;
default:
@@ -3548,18 +4023,13 @@ afr_notify (xlator_t *this, int32_t event,
}
}
UNLOCK (&priv->lock);
- if (up_children > 1) {
- gf_log (this->name, GF_LOG_INFO, "All subvolumes came "
- "up, start crawl");
- call_psh = 1;
- }
}
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
- if (call_psh)
- afr_proactive_self_heal (this, up_child);
+ if (call_psh && priv->shd.iamshd)
+ afr_proactive_self_heal ((void*) (long) up_child);
out:
return ret;
@@ -3591,8 +4061,8 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
local->op_ret = -1;
local->op_errno = EUCLEAN;
- local->child_up = GF_CALLOC (sizeof (*local->child_up),
- priv->child_count,
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
gf_afr_mt_char);
if (!local->child_up) {
if (op_errno)
@@ -3610,6 +4080,27 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
*op_errno = ENOTCONN;
goto out;
}
+
+ local->child_errno = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_errno),
+ gf_afr_mt_int32_t);
+ if (!local->child_errno) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
+ sizeof (int),
+ gf_afr_mt_int32_t);
+ if (!local->transaction.postop_piggybacked) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->append_write = _gf_false;
+
ret = 0;
out:
return ret;
@@ -3621,16 +4112,6 @@ afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
{
int ret = -ENOMEM;
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
-
- lk->entry_locked_nodes = GF_CALLOC (sizeof (*lk->entry_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->entry_locked_nodes)
- goto out;
-
lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->locked_nodes)
@@ -3650,10 +4131,62 @@ out:
return ret;
}
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
int
afr_transaction_local_init (afr_local_t *local, xlator_t *this)
{
- int i = 0;
int child_up_count = 0;
int ret = -ENOMEM;
afr_private_t *priv = NULL;
@@ -3664,6 +4197,14 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (ret < 0)
goto out;
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
ret = -ENOMEM;
child_up_count = afr_up_children_count (local->child_up,
priv->child_count);
@@ -3673,12 +4214,6 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
local->first_up_child = afr_first_up_child (local->child_up,
priv->child_count);
- local->child_errno = GF_CALLOC (sizeof (*local->child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!local->child_errno)
- goto out;
-
local->transaction.eager_lock =
GF_CALLOC (sizeof (*local->transaction.eager_lock),
priv->child_count,
@@ -3687,44 +4222,27 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->transaction.eager_lock)
goto out;
- local->pending = GF_CALLOC (sizeof (*local->pending),
- priv->child_count,
- gf_afr_mt_int32_t);
-
- if (!local->pending)
- goto out;
-
local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children)
goto out;
- if (local->fd) {
- local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->fd_open_on)
- goto out;
- }
-
local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
priv->child_count,
gf_afr_mt_char);
if (!local->transaction.pre_op)
goto out;
- for (i = 0; i < priv->child_count; i++) {
- local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
- 3, /* data + metadata + entry */
- gf_afr_mt_int32_t);
- if (!local->pending[i])
- goto out;
- }
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
- local->transaction.child_errno =
- GF_CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- local->transaction.erase_pending = 1;
+ local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->transaction.txn_changelog)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
out:
@@ -3820,7 +4338,7 @@ afr_set_low_priority (call_frame_t *frame)
int
afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags, int32_t wbflags)
+ int flags)
{
int ret = 0;
uint64_t ctx = 0;
@@ -3845,7 +4363,6 @@ afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
fd_ctx->opened_on[child] = AFR_FD_OPENED;
if (!IA_ISDIR (fd->inode->ia_type)) {
fd_ctx->flags = flags;
- fd_ctx->wbflags = wbflags;
}
ret = 0;
out:
@@ -3898,6 +4415,32 @@ afr_priv_destroy (afr_private_t *priv)
goto out;
inode_unref (priv->root_inode);
GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
+ for (i = 0; i < priv->child_count; i++)
+ {
+ if (priv->shd.statistics[i])
+ eh_destroy (priv->shd.statistics[i]);
+ }
+
+ GF_FREE (priv->shd.statistics);
+
+ GF_FREE (priv->shd.crawl_events);
+
GF_FREE (priv->last_event);
if (priv->pending_key) {
for (i = 0; i < priv->child_count; i++)
@@ -3924,3 +4467,125 @@ xlator_subvolume_count (xlator_t *this)
i++;
return i;
}
+
+inline gf_boolean_t
+afr_is_errno_set (int *child_errno, int child)
+{
+ return child_errno[child];
+}
+
+inline gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child)
+{
+ return !afr_is_errno_set (child_errno, child);
+}
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count)
+{
+ int midx = 0;
+ int idx = 0;
+ int i = 0;
+
+ midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (IA_ISDIR (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ else if (IA_ISREG (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ else
+ idx = -1;
+ for (i = 0; i < child_count; i++) {
+ if (is_pending (ctx, i)) {
+ pending[i][midx] = hton32 (1);
+ if (idx == -1)
+ continue;
+ pending[i][idx] = hton32 (1);
+ }
+ }
+}
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
+ }
+ UNLOCK (&inode->lock);
+}
+
+int
+afr_initialise_statistics (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int i = 0;
+ int child_count = 0;
+ eh_t *stats_per_brick = NULL;
+ shd_crawl_event_t ***shd_crawl_events = NULL;
+ priv = this->private;
+
+ priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!priv->shd.statistics) {
+ ret = -1;
+ goto out;
+ }
+ child_count = priv->child_count;
+ for (i=0; i < child_count ; i++) {
+ stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ _destroy_crawl_event_data);
+ if (!stats_per_brick) {
+ ret = -1;
+ goto out;
+ }
+ priv->shd.statistics[i] = stats_per_brick;
+
+ }
+
+ shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
+ *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
+ priv->child_count,
+ gf_afr_mt_shd_crawl_event_t);
+
+ if (!priv->shd.crawl_events) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index d41102e7e..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -51,7 +42,7 @@
int
afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, int32_t sh_failed)
{
afr_local_t *local = NULL;
@@ -60,7 +51,7 @@ afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
afr_set_opendir_done (this, local->fd->inode);
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
return 0;
}
@@ -99,7 +90,7 @@ __checksums_differ (uint32_t *checksum, int child_count,
int32_t
afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -137,7 +128,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
}
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
+ entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
strlen (entry->d_name));
local->cont.opendir.checksum[child_index] ^= entry_cksum;
}
@@ -152,7 +143,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
+ local->fd, 131072, last_offset, NULL);
return 0;
@@ -175,7 +166,7 @@ out:
afr_set_opendir_done (this, inode);
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
}
}
@@ -208,7 +199,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
+ local->fd, 131072, 0, NULL);
if (!--call_count)
break;
@@ -222,7 +213,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
int32_t
afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -242,8 +233,7 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
{
if (op_ret >= 0) {
local->op_ret = op_ret;
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- 0, 0);
+ ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
if (ret) {
local->op_ret = -1;
local->op_errno = -ret;
@@ -263,7 +253,7 @@ unlock:
goto out;
if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
+ up_children_count > 1 && priv->entry_self_heal) {
/*
* This is the first opendir on this inode. We need
@@ -291,7 +281,7 @@ unlock:
out:
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
return 0;
}
@@ -317,7 +307,7 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
child_count = priv->child_count;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -336,7 +326,7 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
(void*) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- loc, fd);
+ loc, fd, NULL);
if (!--call_count)
break;
@@ -346,7 +336,7 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd);
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
return 0;
}
@@ -368,85 +358,6 @@ struct entry_name {
struct list_head list;
};
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
-{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
-
-out:
- return ret;
-}
-
-
-static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
-{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
- }
-}
-
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
-{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
-
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
- }
-
- return offset;
-}
-
-
static void
afr_forget_entries (fd_t *fd)
{
@@ -472,173 +383,70 @@ afr_forget_entries (fd_t *fd)
}
}
-
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
{
- afr_local_t * local = NULL;
gf_dirent_t * entry = NULL;
gf_dirent_t * tmp = NULL;
- local = frame->local;
-
- if (op_ret == -1)
- goto out;
-
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
list_del_init (&entry->list);
GF_FREE (entry);
}
}
-
-out:
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
-
- return 0;
}
-
int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+afr_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t next_call_child = -1;
- int ret = 0;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- int32_t *last_index = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
- int32_t call_child = -1;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ if (op_ret == -1)
+ goto out;
local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- read_child = (long) cookie;
- last_index = &local->cont.readdir.last_index;
- fresh_children = local->fresh_children;
-
- /* the value of the last_index changes if afr_next_call_child is
- * called. So to find the call_child of this callback use last_index
- * before the next_call_child call.
- */
- if (*last_index == -1)
- call_child = read_child;
- else
- call_child = fresh_children[*last_index];
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (op_ret == -1) {
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index,
- read_child);
- if (next_call_child < 0)
- goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- next_call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readdirp,
- local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
- afr_remember_entries (entries, local->fd);
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that will make the application stop reading. So
- try to get more entries */
+int32_t
+afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[call_child]->name);
+ if (op_ret == -1)
+ goto out;
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[call_child],
- children[call_child]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
-
+ AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
return 0;
}
int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+ fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -647,7 +455,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -663,76 +471,66 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
read_child = afr_inode_get_read_ctx (this, fd->inode,
local->fresh_children);
ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
if (ret < 0) {
op_errno = -ret;
goto out;
}
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
- fd_ctx->last_tried = call_child;
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
}
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
+
if (whichop == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdir, fd,
- size, offset);
+ size, offset, dict);
else
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdirp, fd,
- size, offset);
+ size, offset, dict);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 6a6bc6354..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,23 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 4d2fcd226..1943b719b 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -47,31 +38,222 @@
#include "afr.h"
#include "afr-transaction.h"
-void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
{
- char *tmp = NULL;
+ int ret = -1;
+ char *child_path = NULL;
if (!child->parent) {
- //this should never be called with root as the child
- GF_ASSERT (0);
- loc_copy (parent, child);
- return;
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- tmp = gf_strdup (child->path);
- parent->path = gf_strdup (dirname (tmp));
- GF_FREE (tmp);
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->path = gf_strdup( dirname (child_path) );
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+ ret = 0;
+out:
+ GF_FREE(child_path);
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
+ return ret;
+}
+
+void
+__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *prenewparent,
+ struct iatt *postnewparent)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret > -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0) ||
+ (child_index == local->read_child_index)) {
+ local->cont.dir_fop.preparent = *preparent;
+ local->cont.dir_fop.postparent = *postparent;
+ if (buf)
+ local->cont.dir_fop.buf = *buf;
+ if (prenewparent)
+ local->cont.dir_fop.prenewparent = *prenewparent;
+ if (postnewparent)
+ local->cont.dir_fop.postnewparent = *postnewparent;
+ }
+
+ local->cont.dir_fop.inode = inode;
+
+ local->fresh_children[local->success_count] = child_index;
+ local->success_count++;
+ local->child_errno[child_index] = 0;
+ } else {
+ local->child_errno[child_index] = op_errno;
+ }
+
+ local->op_errno = op_errno;
+}
+
+int
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = 0;
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (frame);
+ }
+ return 0;
+}
+
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ GF_UNUSED int op_errno = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
+ new_local = new_frame->local;
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
+ gf_afr_mt_dict_t);
+ if (!xattr)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+ xattr[i] = dict_new ();
+ if (!xattr[i])
+ goto out;
+ }
+
+ afr_prepare_new_entry_pending_matrix (changelog,
+ afr_is_errno_set,
+ local->child_errno,
+ &local->cont.dir_fop.buf,
+ priv->child_count);
+
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
+ new_local->call_count = local->success_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+
+ afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr[i], NULL);
+ }
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ afr_xattr_array_destroy (xattr, priv->child_count);
+ return;
+}
+
+gf_boolean_t
+afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
+{
+ glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
+ int i = 0;
+
+ for (i = 0; fops[i] != GF_FOP_NULL; i++) {
+ if (fop == fops[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ if (local->success_count == priv->child_count)
+ goto out;
+
+ if (!afr_is_new_entry_changelog_needed (local->op))
+ goto out;
- if (!uuid_is_null (child->pargfid))
- uuid_copy (parent->gfid, child->pargfid);
+ afr_mark_new_entry_changelog (frame, this);
+
+out:
+ return;
+}
+
+void
+afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->cont.dir_fop.inode == NULL)
+ goto done;
+ afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child,
+ local->cont.dir_fop.buf.ia_gfid);
+done:
+ local->transaction.unwind (frame, this);
+ afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ local->transaction.resume (frame, this);
}
/* {{{ create */
@@ -81,7 +263,6 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -95,18 +276,14 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
AFR_STACK_UNWIND (create, main_frame,
local->op_ret, local->op_errno,
local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ local->xdata_rsp);
}
return 0;
@@ -117,32 +294,24 @@ int
afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
+ if (op_ret > -1) {
ret = afr_fd_ctx_set (this, fd);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not set ctx on fd=%p", fd);
@@ -153,7 +322,6 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
ret = fd_ctx_get (fd, this, &ctx);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not get fd ctx for fd=%p", fd);
@@ -167,23 +335,14 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
fd_ctx->flags = local->cont.create.flags;
- if (local->success_count == 0)
- local->cont.create.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
-
- local->cont.create.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
+ if (local->success_count == 0) {
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
}
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
unlock:
@@ -191,15 +350,8 @@ unlock:
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -235,8 +387,9 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
&local->loc,
local->cont.create.flags,
local->cont.create.mode,
+ local->umask,
local->cont.create.fd,
- local->cont.create.params);
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -264,13 +417,14 @@ afr_create_done (call_frame_t *frame, xlator_t *this)
int
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -286,7 +440,7 @@ afr_create (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -302,22 +456,41 @@ afr_create (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
+ local->umask = umask;
if (params)
- local->cont.create.params = dict_ref (params);
+ local->xdata_req = dict_ref (params);
local->transaction.fop = afr_create_wind;
local->transaction.done = afr_create_done;
local->transaction.unwind = afr_create_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -325,7 +498,7 @@ out:
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -340,7 +513,6 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -354,17 +526,13 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
AFR_STACK_UNWIND (mknod, main_frame,
local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -375,58 +543,25 @@ int
afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mknod.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -460,7 +595,8 @@ afr_mknod_wind (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->mknod,
&local->loc, local->cont.mknod.mode,
local->cont.mknod.dev,
- local->cont.mknod.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -485,14 +621,15 @@ afr_mknod_done (call_frame_t *frame, xlator_t *this)
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -508,7 +645,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -524,21 +661,40 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
+ local->umask = umask;
if (params)
- local->cont.mknod.params = dict_ref (params);
+ local->xdata_req = dict_ref (params);
local->transaction.fop = afr_mknod_wind;
local->transaction.done = afr_mknod_done;
local->transaction.unwind = afr_mknod_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -546,7 +702,7 @@ out:
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -562,7 +718,6 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -576,17 +731,13 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
AFR_STACK_UNWIND (mkdir, main_frame,
local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -597,58 +748,25 @@ int
afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mkdir.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -682,7 +800,8 @@ afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->mkdir,
&local->loc, local->cont.mkdir.mode,
- local->cont.mkdir.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -706,16 +825,16 @@ afr_mkdir_done (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -731,7 +850,7 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -748,19 +867,38 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
UNLOCK (&priv->read_child_lock);
local->cont.mkdir.mode = mode;
+ local->umask = umask;
if (params)
- local->cont.mkdir.params = dict_ref (params);
+ local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_MKDIR;
local->transaction.fop = afr_mkdir_wind;
local->transaction.done = afr_mkdir_done;
local->transaction.unwind = afr_mkdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -769,7 +907,7 @@ out:
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -785,7 +923,6 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -799,17 +936,13 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
AFR_STACK_UNWIND (link, main_frame,
local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -820,59 +953,25 @@ int
afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -901,11 +1000,12 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->link,
&local->loc,
- &local->newloc);
+ &local->newloc, local->xdata_req);
if (!--call_count)
break;
@@ -931,13 +1031,14 @@ afr_link_done (call_frame_t *frame, xlator_t *this)
int
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -953,7 +1054,7 @@ afr_link (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -962,6 +1063,8 @@ afr_link (call_frame_t *frame, xlator_t *this,
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
LOCK (&priv->read_child_lock);
{
@@ -970,25 +1073,41 @@ afr_link (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_LINK;
local->transaction.fop = afr_link_wind;
local->transaction.done = afr_link_done;
local->transaction.unwind = afr_link_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1004,7 +1123,6 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -1018,17 +1136,13 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
AFR_STACK_UNWIND (symlink, main_frame,
local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1039,58 +1153,25 @@ int
afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.symlink.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1125,7 +1206,8 @@ afr_symlink_wind (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->symlink,
local->cont.symlink.linkpath,
&local->loc,
- local->cont.symlink.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
@@ -1152,13 +1234,14 @@ afr_symlink_done (call_frame_t *frame, xlator_t *this)
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1174,7 +1257,7 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1191,19 +1274,38 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
UNLOCK (&priv->read_child_lock);
local->cont.symlink.linkpath = gf_strdup (linkpath);
+ local->umask = umask;
if (params)
- local->cont.symlink.params = dict_ref (params);
+ local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_SYMLINK;
local->transaction.fop = afr_symlink_wind;
local->transaction.done = afr_symlink_done;
local->transaction.unwind = afr_symlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1211,7 +1313,7 @@ out:
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1226,7 +1328,6 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -1240,19 +1341,14 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
AFR_STACK_UNWIND (rename, main_frame,
local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent);
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent,
+ NULL);
}
return 0;
@@ -1263,7 +1359,8 @@ int
afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -1277,38 +1374,22 @@ afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1342,7 +1423,7 @@ afr_rename_wind (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->rename,
&local->loc,
- &local->newloc);
+ &local->newloc, NULL);
if (!--call_count)
break;
}
@@ -1367,13 +1448,15 @@ afr_rename_done (call_frame_t *frame, xlator_t *this)
int
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1389,7 +1472,7 @@ afr_rename (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1401,18 +1484,61 @@ afr_rename (call_frame_t *frame, xlator_t *this,
local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+ local->op = GF_FOP_RENAME;
local->transaction.fop = afr_rename_wind;
local->transaction.done = afr_rename_done;
local->transaction.unwind = afr_rename_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (oldloc->path);
local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ }
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1421,7 +1547,7 @@ out:
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (rename, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1451,8 +1577,9 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (unlink, main_frame,
local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent);
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1462,7 +1589,7 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
int
afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -1475,36 +1602,15 @@ afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (child_index == local->read_child_index) {
local->read_child_returned = _gf_true;
}
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1537,7 +1643,8 @@ afr_unlink_wind (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->unlink,
- &local->loc);
+ &local->loc, local->xflag,
+ local->xdata_req);
if (!--call_count)
break;
@@ -1563,13 +1670,14 @@ afr_unlink_done (call_frame_t *frame, xlator_t *this)
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1585,7 +1693,7 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1593,17 +1701,38 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
goto out;
loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+ local->op = GF_FOP_UNLINK;
local->transaction.fop = afr_unlink_wind;
local->transaction.done = afr_unlink_done;
local->transaction.unwind = afr_unlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1611,7 +1740,7 @@ out:
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
- NULL, NULL);
+ NULL, NULL, NULL);
}
return 0;
@@ -1643,8 +1772,9 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (rmdir, main_frame,
local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent);
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1654,7 +1784,7 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
int
afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -1668,36 +1798,22 @@ afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
-
if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL,
+ NULL);
+
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1730,7 +1846,8 @@ afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags);
+ &local->loc, local->cont.rmdir.flags,
+ NULL);
if (!--call_count)
break;
@@ -1756,13 +1873,15 @@ afr_rmdir_done (call_frame_t *frame, xlator_t *this)
int
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1778,7 +1897,7 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1788,23 +1907,53 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
local->cont.rmdir.flags = flags;
loc_copy (&local->loc, loc);
+ local->op = GF_FOP_RMDIR;
local->transaction.fop = afr_rmdir_wind;
local->transaction.done = afr_rmdir_done;
local->transaction.unwind = afr_rmdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 0290c6350..02f0a3682 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -23,38 +14,34 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, dict_t *params);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 162c7eed4..e06e3b2f2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -58,7 +49,7 @@
int32_t
afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -92,12 +83,13 @@ afr_access_cbk (call_frame_t *frame, void *cookie,
(void *) (long) read_child,
children[next_call_child],
children[next_call_child]->fops->access,
- &local->loc, local->cont.access.mask);
+ &local->loc, local->cont.access.mask,
+ NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
}
return 0;
@@ -105,7 +97,8 @@ out:
int32_t
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
xlator_t **children = NULL;
@@ -124,7 +117,9 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
children = priv->children;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -156,12 +151,12 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->access,
- loc, mask);
+ loc, mask, xdata);
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (access, frame, -1, op_errno);
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
return 0;
}
@@ -173,7 +168,7 @@ out:
int32_t
afr_stat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -207,12 +202,12 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,
(void *) (long) read_child,
children[next_call_child],
children[next_call_child]->fops->stat,
- &local->loc);
+ &local->loc, NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
}
return 0;
@@ -220,7 +215,7 @@ out:
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -239,7 +234,9 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
children = priv->children;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -267,12 +264,12 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->stat,
- loc);
+ loc, xdata);
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -284,7 +281,8 @@ out:
int32_t
afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -318,12 +316,12 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(void *) (long) read_child,
children[next_call_child],
children[next_call_child]->fops->fstat,
- local->fd);
+ local->fd, NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
}
return 0;
@@ -332,7 +330,7 @@ out:
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -354,7 +352,9 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd->inode, out);
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -383,20 +383,17 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
local->fd = fd_ref (fd);
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_open_fd_fix (fd, this);
+
STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->fstat,
- fd);
+ fd, xdata);
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -408,7 +405,7 @@ out:
int32_t
afr_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf)
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -442,12 +439,13 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->readlink,
&local->loc,
- local->cont.readlink.size);
+ local->cont.readlink.size, NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf);
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
+ xdata);
}
return 0;
@@ -456,7 +454,7 @@ out:
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
afr_private_t *priv = NULL;
xlator_t **children = NULL;
@@ -475,7 +473,9 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -506,12 +506,12 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readlink,
- loc, size);
+ loc, size, xdata);
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -526,7 +526,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -538,13 +538,14 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
@@ -574,7 +575,7 @@ __filter_xattrs (dict_t *dict)
int32_t
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -608,7 +609,8 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->getxattr,
&local->loc,
- local->cont.getxattr.name);
+ local->cont.getxattr.name,
+ NULL);
}
out:
@@ -616,39 +618,620 @@ out:
if (op_ret >= 0 && dict)
__filter_xattrs (dict);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
}
return 0;
}
int32_t
-afr_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
return 0;
}
int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *pathinfo = NULL;
- char *pathinfo_serz = NULL;
- char pathinfo_cky[1024] = {0,};
- dict_t *xattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
if (!frame || !frame->local || !this) {
- gf_log (this->name, GF_LOG_ERROR, "possible NULL deref");
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
goto out;
}
@@ -666,90 +1249,222 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
local->dict = dict_new ();
if (local->dict) {
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
if (ret)
goto out;
- pathinfo = gf_strdup (pathinfo);
+ xattr = gf_strdup (xattr);
- snprintf (pathinfo_cky, 1024, "%s-%ld", GF_XATTR_PATHINFO_KEY, cky);
- ret = dict_set_dynstr (local->dict, pathinfo_cky, pathinfo);
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo cookie key");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
goto out;
}
- local->cont.getxattr.pathinfo_len += strlen (pathinfo) + 1;
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
}
}
out:
UNLOCK (&frame->lock);
if (!callcnt) {
- if (!local->cont.getxattr.pathinfo_len)
+ if (!local->cont.getxattr.xattr_len)
goto unwind;
- xattr = dict_new ();
- if (!xattr)
+ nxattr = dict_new ();
+ if (!nxattr)
goto unwind;
/* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.pathinfo_len += (padding + 2);
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
- pathinfo_serz = GF_CALLOC (local->cont.getxattr.pathinfo_len, sizeof (char),
- gf_common_mt_char);
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
- if (!pathinfo_serz)
+ if (!xattr_serz)
goto unwind;
/* the xlator info */
- sprintf (pathinfo_serz, "(<"AFR_PATHINFO_HEADER"%s> ", this->name);
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
/* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict, pathinfo_serz + strlen (pathinfo_serz),
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
&tlen, ' ');
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing dictionary");
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
goto unwind;
}
/* closing part */
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr (xattr, GF_XATTR_PATHINFO_KEY, pathinfo_serz);
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo key in dict");
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
- if (local->dict)
- dict_unref (local->dict);
-
- if (xattr)
- dict_unref (xattr);
+ if (nxattr)
+ dict_unref (nxattr);
}
return ret;
}
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_min_stime (THIS, data, key, value);
+
+ return ret;
+}
+
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
+
+
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- xlator_t **sub_volumes = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
+ gf_boolean_t is_spl = _gf_true;
+ GF_ASSERT (cbk);
+ if (!cbk) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
+
+out:
+ return is_spl;
+}
+
+static void
+afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->getxattr,
+ loc, name, NULL);
+ }
+ return;
+}
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -760,7 +1475,9 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -768,38 +1485,98 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
goto out;
loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup (name);
+
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no data present for key %s",
+ loc->path, name);
+ op_errno = ENODATA;
+ goto out;
+ }
+ if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
+ priv->vol_uuid)) {
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
gf_log (this->name, GF_LOG_INFO,
- "%s: no data present for key %s",
+ "%s: failed to get marker attr (%s)",
loc->path, name);
- op_errno = ENODATA;
+ op_errno = EINVAL;
goto out;
}
- if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ return 0;
+ }
+
+ /*
+ * if we are doing getxattr with pathinfo as the key then we
+ * collect information from all childs
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_frm_all_children (this, frame, name,
+ loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (*priv->vol_uuid) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
for (i = 0, trav = this->children; trav ;
trav = trav->next, i++) {
*(sub_volumes + i) = trav->xlator;
+
}
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, afr_getxattr_unwind,
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
+ if (cluster_getmarkerattr (frame, this, loc,
+ name, local,
+ afr_getxattr_unwind,
sub_volumes,
priv->child_count,
- MARKER_UUID_TYPE,
+ MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
priv->vol_uuid)) {
-
gf_log (this->name, GF_LOG_INFO,
"%s: failed to get marker attr (%s)",
loc->path, name);
@@ -809,84 +1586,209 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
+ }
- /*
- * if we are doing getxattr with pathinfo as the key then we
- * collect information from all childs
- */
- if (strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0) {
-
- local->call_count = priv->child_count;
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, afr_getxattr_pathinfo_cbk,
- (void *) (long) i,
- children[i], children[i]->fops->getxattr,
- loc, name);
- }
+no_name:
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- return 0;
- }
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- if (*priv->vol_uuid) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->getxattr,
+ loc, name, xdata);
- local->marker.call_count = priv->child_count;
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+/* {{{ fgetxattr */
- *(sub_volumes + i) = trav->xlator;
- }
+int32_t
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- if (cluster_getmarkerattr (frame, this, loc,
- name, local,
- afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
+ priv = this->private;
+ children = priv->children;
- return 0;
- }
- }
+ local = frame->local;
+
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->fgetxattr,
+ local->fd,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static void
+afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, fd_t *fd,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->fgetxattr,
+ fd, name, NULL);
}
+ return;
+}
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ frame->local = local;
+
+ op_ret = afr_local_init (local, priv, &op_errno);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_frm_all_children (this, frame, name,
+ fd, cbk);
+ return 0;
+ }
+
+
local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
&local->cont.getxattr.last_index);
- if (ret < 0) {
- op_errno = -ret;
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
goto out;
}
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
(void *) (long) call_child,
children[call_child],
- children[call_child]->fops->getxattr,
- loc, name);
+ children[call_child]->fops->fgetxattr,
+ fd, name, xdata);
- ret = 0;
+ op_ret = 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
+ NULL);
+ }
return 0;
}
@@ -911,7 +1813,7 @@ int32_t
afr_readv_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -952,13 +1854,15 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->readv,
local->fd, local->cont.readv.size,
- local->cont.readv.offset);
+ local->cont.readv.offset,
+ local->cont.readv.flags,
+ NULL);
}
out:
if (unwind) {
AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
+ vector, count, buf, iobref, xdata);
}
return 0;
@@ -967,7 +1871,7 @@ out:
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -985,7 +1889,9 @@ afr_readv (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1012,23 +1918,21 @@ afr_readv (call_frame_t *frame, xlator_t *this,
local->cont.readv.size = size;
local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+
+ afr_open_fd_fix (fd, this);
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
STACK_WIND_COOKIE (frame, afr_readv_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
ret = 0;
out:
if (ret < 0) {
AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
- NULL);
+ NULL, NULL);
}
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 5479cfbd5..e4091a793 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,30 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 48094931f..c1ec69a55 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -48,45 +39,151 @@
#include "afr-transaction.h"
#include "afr-self-heal-common.h"
+void
+__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child,
+ xlator_t *this, int32_t *op_ret, int32_t *op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (*op_ret, *op_errno)) {
+ local->child_errno[child_index] = *op_errno;
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ if (*op_errno != EFBIG)
+ afr_transaction_fop_failed (frame, this,
+ child_index);
+ break;
+ default:
+ afr_transaction_fop_failed (frame, this, child_index);
+ break;
+ }
+ local->op_errno = *op_errno;
+ goto out;
+ }
+
+ if ((local->success_count == 0) || (read_child == child_index)) {
+ local->op_ret = *op_ret;
+ if (prebuf)
+ local->cont.inode_wfop.prebuf = *prebuf;
+ if (postbuf)
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+out:
+ return;
+}
+
/* {{{ writev */
-int
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
+{
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+}
+
+void
afr_writev_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ local = frame->local;
+
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+}
+
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
local = frame->local;
LOCK (&frame->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
+ fop_frame = local->transaction.main_frame;
local->transaction.main_frame = NULL;
}
UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf);
+ return fop_frame;
+}
+
+int
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *fop_frame = NULL;
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
}
return 0;
}
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
int
afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
int child_index = (long) cookie;
int call_count = -1;
int read_child = 0;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
local = frame->local;
+ priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
@@ -96,32 +193,81 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+
+ /* stage the best case return value for unwind */
+ if ((local->success_count == 0) || (op_ret > local->op_ret)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret != -1) {
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if ((ret == 0) &&
+ (open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
}
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
- }
-
- local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ afr_writev_handle_short_writes (frame, this);
+ if (afr_any_fops_failed (local, priv)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
}
return 0;
}
@@ -133,6 +279,8 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int i = 0;
int call_count = -1;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = 0;
local = frame->local;
priv = this->private;
@@ -146,6 +294,28 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->transaction.unwind(frame, this);
+ local->transaction.resume(frame, this);
+ return 0;
+ }
+
+ xdata = dict_new ();
+ if (xdata) {
+ ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ sizeof (uint32_t));
+ ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ 0);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
@@ -157,13 +327,18 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
local->cont.writev.vector,
local->cont.writev.count,
local->cont.writev.offset,
- local->cont.writev.iobref);
+ local->cont.writev.flags,
+ local->cont.writev.iobref,
+ xdata);
if (!--call_count)
break;
}
}
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
@@ -203,7 +378,7 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
}
transaction_frame->local = local;
- frame->local = NULL;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local->op = GF_FOP_WRITE;
@@ -211,10 +386,17 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->transaction.fop = afr_writev_wind;
local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
local->transaction.start = 0;
local->transaction.len = 0;
} else {
@@ -223,156 +405,91 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->cont.writev.count);
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
}
return 0;
}
-static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
+static void
+afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this)
{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
-
- if ((!fd) || (!fd->inode))
- return -1;
-
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
-
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
-
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
-
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
-
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ char *reason = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: "
+ "fd: %p, inode: %p", fd,
+ fd ? fd->inode : NULL);
+ goto out;
}
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
-
- return 0;
-}
-
-afr_fd_paused_call_t*
-afr_paused_call_create (call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_fd_paused_call_t *paused_call = NULL;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
- GF_ASSERT (local->fop_call_continue);
-
- paused_call = GF_CALLOC (1, sizeof (*paused_call),
- gf_afr_fd_paused_call_t);
- if (paused_call) {
- INIT_LIST_HEAD (&paused_call->call_list);
- paused_call->frame = frame;
- }
-
- return paused_call;
-}
-
-static int
-afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- int ret = 0;
-
- paused_call = afr_paused_call_create (frame);
- if (paused_call)
- list_add (&paused_call->call_list, &fd_ctx->paused_calls);
- else
- ret = -ENOMEM;
-
- return ret;
-}
+ ret = afr_local_init (local, this->private, &op_errno);
+ if (ret < 0)
+ goto out;
-static void
-afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- char *reason = NULL;
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
- local = frame->local;
sh = &local->self_heal;
- inode = local->fd->inode;
-
- sh->do_missing_entry_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_data_self_heal = _gf_true;
+ sh->do_metadata_self_heal = _gf_true;
+ if (fd->inode->ia_type == IA_IFREG)
+ sh->do_data_self_heal = _gf_true;
+ else if (fd->inode->ia_type == IA_IFDIR)
+ sh->do_entry_self_heal = _gf_true;
reason = "subvolume came online";
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
+ afr_launch_self_heal (frame, this, fd->inode, _gf_true,
+ fd->inode->ia_type, reason, NULL, NULL);
+ return;
+out:
+ AFR_STACK_DESTROY (frame);
}
-int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
-{
- int ret = 0;
- int i = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- gf_boolean_t need_self_heal = _gf_false;
- int *need_open = NULL;
- int need_open_count = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- gf_boolean_t fop_continue = _gf_true;
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this)
+{
+ int ret = 0;
+ int i = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t need_self_heal = _gf_false;
+ int *need_open = NULL;
+ size_t need_open_count = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
priv = this->private;
- GF_ASSERT (local->fd);
- if (pause_fop)
- GF_ASSERT (local->fop_call_continue);
-
- ret = afr_prepare_loc (frame, local->fd);
- if (ret < 0) {
- //File does not exist we cant open it.
- ret = 0;
+ if (!afr_is_fd_fixable (fd))
goto out;
- }
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- ret = -EINVAL;
- goto unlock;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
- LOCK (&local->fd->lock);
+ LOCK (&fd->lock);
{
if (fd_ctx->up_count < priv->up_count) {
need_self_heal = _gf_true;
@@ -380,62 +497,40 @@ afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
fd_ctx->down_count = priv->down_count;
}
+ need_open = alloca (priv->child_count * sizeof (*need_open));
for (i = 0; i < priv->child_count; i++) {
- if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) &&
- local->child_up[i]) {
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
- if (!need_open)
- need_open = GF_CALLOC (priv->child_count,
- sizeof (*need_open),
- gf_afr_mt_int32_t);
- need_open[i] = 1;
- need_open_count++;
- } else if (pause_fop && local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING)) {
- local->fop_paused = _gf_true;
- }
- }
+ need_open[i] = 0;
+ if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED)
+ continue;
+
+ if (!priv->child_up[i])
+ continue;
- if (local->fop_paused) {
- GF_ASSERT (pause_fop);
- gf_log (this->name, GF_LOG_INFO, "Pause fd %p",
- local->fd);
- ret = afr_pause_fd_fop (frame, this, fd_ctx);
- if (ret)
- goto unlock;
- fop_continue = _gf_false;
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+
+ need_open[i] = 1;
+ need_open_count++;
}
}
-unlock:
- UNLOCK (&local->fd->lock);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s",
- local->loc.path);
- fop_continue = _gf_false;
+ UNLOCK (&fd->lock);
+ if (ret)
goto out;
- }
if (need_self_heal)
- afr_trigger_open_fd_self_heal (frame, this);
+ afr_trigger_open_fd_self_heal (fd, this);
if (!need_open_count)
goto out;
- gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd);
- afr_fix_open (frame, this, fd_ctx, need_open_count, need_open);
- fop_continue = _gf_false;
+ afr_fix_open (this, fd, need_open_count, need_open);
out:
- if (need_open)
- GF_FREE (need_open);
- if (fop_continue && local->fop_call_continue)
- local->fop_call_continue (frame, this);
- return ret;
+ return;
}
int
afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -448,9 +543,14 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
QUORUM_CHECK(writev,out);
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -460,21 +560,24 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->cont.writev.vector = iov_dup (vector, count);
local->cont.writev.count = count;
local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
local->cont.writev.iobref = iobref_ref (iobref);
local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_writev;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_writev (frame, this);
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -503,8 +606,9 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -514,17 +618,14 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
int
afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int read_child = 0;
int call_count = -1;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
@@ -534,38 +635,22 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -593,6 +678,7 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
@@ -601,7 +687,8 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->truncate,
&local->loc,
- local->cont.truncate.offset);
+ local->cont.truncate.offset,
+ NULL);
if (!--call_count)
break;
@@ -629,7 +716,7 @@ afr_truncate_done (call_frame_t *frame, xlator_t *this)
int
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -651,7 +738,7 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -670,14 +757,18 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
local->transaction.start = offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -708,8 +799,9 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
}
@@ -718,17 +810,14 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
int
afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int call_count = -1;
- int need_unwind = 0;
int read_child = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
@@ -738,38 +827,22 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -797,6 +870,7 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
@@ -804,7 +878,9 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
+ local->fd,
+ local->cont.ftruncate.offset,
+ NULL);
if (!--call_count)
break;
@@ -859,14 +935,19 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
local->transaction.start = local->cont.ftruncate.offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
}
return 0;
@@ -875,7 +956,7 @@ out:
int
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -889,9 +970,13 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
QUORUM_CHECK(ftruncate,out);
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -901,20 +986,17 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
local->cont.ftruncate.offset = offset;
local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_ftruncate;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_open_fd_fix (fd, this);
+
+ afr_do_ftruncate (frame, this);
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -943,8 +1025,9 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -954,7 +1037,7 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
int
afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -974,29 +1057,14 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1042,7 +1110,8 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->setattr,
&local->loc,
&local->cont.setattr.in_buf,
- local->cont.setattr.valid);
+ local->cont.setattr.valid,
+ NULL);
if (!--call_count)
break;
@@ -1070,7 +1139,7 @@ afr_setattr_done (call_frame_t *frame, xlator_t *this)
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -1092,7 +1161,7 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1112,14 +1181,18 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -1146,8 +1219,9 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -1157,7 +1231,7 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
int
afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -1177,29 +1251,14 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
-
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1245,7 +1304,8 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->fsetattr,
local->fd,
&local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid);
+ local->cont.fsetattr.valid,
+ NULL);
if (!--call_count)
break;
@@ -1272,7 +1332,7 @@ afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid)
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -1286,6 +1346,11 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
QUORUM_CHECK(fsetattr,out);
transaction_frame = copy_frame (frame);
@@ -1294,7 +1359,7 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1310,24 +1375,24 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
local->fd = fd_ref (fd);
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_open_fd_fix (fd, this);
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -1355,38 +1420,34 @@ afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ local->op_ret, local->op_errno,
+ NULL);
+ }
return 0;
}
int
afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1406,10 +1467,10 @@ afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
local = frame->local;
priv = this->private;
@@ -1432,7 +1493,8 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->setxattr,
&local->loc,
local->cont.setxattr.dict,
- local->cont.setxattr.flags);
+ local->cont.setxattr.flags,
+ NULL);
if (!--call_count)
break;
@@ -1446,7 +1508,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
int
afr_setxattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = frame->local;
local->transaction.unwind (frame, this);
@@ -1457,16 +1519,23 @@ afr_setxattr_done (call_frame_t *frame, xlator_t *this)
int
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ int ret = -1;
+ int op_errno = EINVAL;
- VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this->private, out);
priv = this->private;
@@ -1478,7 +1547,7 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1498,14 +1567,211 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* {{{ fsetxattr */
+
+
+int
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fsetxattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
+
+
+int
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsetxattr,
+ local->fd,
+ local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
+ QUORUM_CHECK(fsetxattr,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
+
+ local->transaction.fop = afr_fsetxattr_wind;
+ local->transaction.done = afr_fsetxattr_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno);
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
}
return 0;
@@ -1513,6 +1779,7 @@ out:
/* }}} */
+
/* {{{ removexattr */
@@ -1534,38 +1801,34 @@ afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ local->op_ret, local->op_errno,
+ NULL);
+ }
return 0;
}
int
afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1610,7 +1873,8 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->removexattr,
&local->loc,
- local->cont.removexattr.name);
+ local->cont.removexattr.name,
+ NULL);
if (!--call_count)
break;
@@ -1636,16 +1900,23 @@ afr_removexattr_done (call_frame_t *frame, xlator_t *this)
int
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this->private, out);
VALIDATE_OR_GOTO (loc, out);
@@ -1659,7 +1930,7 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (transaction_frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
local = transaction_frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1678,15 +1949,913 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* ffremovexattr */
+int
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fremovexattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
+
+
+int
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
}
return 0;
}
+
+
+int32_t
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fremovexattr,
+ local->fd,
+ local->cont.removexattr.name,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
+ QUORUM_CHECK(fremovexattr, out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.removexattr.name = gf_strdup (name);
+
+ local->transaction.fop = afr_fremovexattr_wind;
+ local->transaction.done = afr_fremovexattr_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fallocate,
+ local->fd,
+ local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_fallocate (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_FALLOCATE;
+
+ local->transaction.fop = afr_fallocate_wind;
+ local->transaction.done = afr_fallocate_done;
+ local->transaction.unwind = afr_fallocate_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
+
+ /* fallocate can modify the file size */
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(fallocate,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_fallocate (frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ discard */
+
+static int
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_discard_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->discard,
+ local->fd,
+ local->cont.discard.offset,
+ local->cont.discard.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_discard_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_discard (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_DISCARD;
+
+ local->transaction.fop = afr_discard_wind;
+ local->transaction.done = afr_discard_done;
+ local->transaction.unwind = afr_discard_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(discard, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_discard(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+
+/* {{{ zerofill */
+
+static int
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.zerofill.prebuf,
+ &local->cont.zerofill.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->zerofill,
+ local->fd,
+ local->cont.zerofill.offset,
+ local->cont.zerofill.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_zerofill(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_ZEROFILL;
+
+ local->transaction.fop = afr_zerofill_wind;
+ local->transaction.done = afr_zerofill_done;
+ local->transaction.unwind = afr_zerofill_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this,
+ AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(zerofill, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ goto out;
+ }
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_zerofill(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index f9aa7bd36..8e93ca44a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,51 +13,70 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid);
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid);
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 41fa8a30b..060d78f35 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
@@ -31,8 +22,69 @@
#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
#define LOCKED_LOWER 0x2 /* for lower path */
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
static uint64_t afr_lock_number = 1;
@@ -57,12 +109,13 @@ afr_set_lock_number (call_frame_t *frame, xlator_t *this)
}
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this)
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
{
gf_log (this->name, GF_LOG_TRACE,
"Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)frame->root);
- frame->root->lk_owner = (uint64_t) (unsigned long)frame->root;
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
}
static int
@@ -98,16 +151,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- if (local->fd) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
}
return call_count;
@@ -115,7 +161,7 @@ internal_lock_count (call_frame_t *frame, xlator_t *this)
static void
afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -163,11 +209,11 @@ afr_print_inodelk (char *str, int size, int cmd,
}
snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
@@ -183,11 +229,11 @@ afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
void
afr_print_entrylk (char *str, int size, const char *basename,
- uint64_t owner)
+ gf_lkowner_t *owner)
{
- snprintf (str, size, "Basename=%s, lk-owner=%llu",
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
basename ? basename : "<nul>",
- (unsigned long long)owner);
+ lkowner_utoa (owner));
}
static void
@@ -241,27 +287,20 @@ afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int op_ret, int op_errno, int32_t child_index)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
@@ -270,39 +309,31 @@ afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lockee,
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
(unsigned long long) int_lock->lock_number);
}
static void
-afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int32_t cmd, int32_t child_index)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
- if (!priv->inodelk_trace) {
- return;
- }
-
- afr_print_inodelk (lock, 256, cmd, flock, frame->root->lk_owner);
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
@@ -317,20 +348,21 @@ afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
+ int32_t cookie)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -338,36 +370,41 @@ afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_entrylk (lock, 256, basename, frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
static void
-afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename, int op_ret,
- int op_errno, int32_t child_index)
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -375,20 +412,25 @@ afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
verdict,
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
@@ -441,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local)
return ret;
}
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
static int
initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
{
@@ -458,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
}
return 0;
@@ -471,19 +559,23 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
- int i = 0;
+ afr_inodelk_t *inodelk = NULL;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
return 0;
}
@@ -493,7 +585,7 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
{
int ret = 0;
- ret = strcmp (l1->path, l2->path);
+ ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
if (ret == 0)
ret = strcmp (b1, b2);
@@ -505,6 +597,18 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
}
int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+{
+ int call_count = 0;
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
+
+ return call_count;
+}
+
+int
afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
{
@@ -522,7 +626,7 @@ afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
/* FIXME: What if UNLOCK fails */
static int32_t
afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -548,33 +652,37 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
local = frame->local;
int_lock = &local->internal_lock;
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, NULL, op_ret,
op_errno, child_index);
+ priv = this->private;
+
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unlock failed on %d, reason: %s",
- local->loc.path, child_index, strerror (op_errno));
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
}
- int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
-
- if (op_ret == 1) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
local->transaction.eager_lock[child_index] = 0;
- }
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
@@ -584,9 +692,12 @@ static int
afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
int call_count = 0;
int i = 0;
int piggyback = 0;
@@ -597,15 +708,14 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = F_UNLCK;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- gf_log (this->name, GF_LOG_DEBUG, "attempting data unlock range %"PRIu64
- " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
- frame->root->lk_owner);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
+ full_flock.l_type = F_UNLCK;
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
priv->child_count);
int_lock->lk_call_count = call_count;
@@ -621,11 +731,11 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
fd_ctx = afr_fd_ctx_get (local->fd, this);
for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->inode_locked_nodes[i] & LOCKED_YES)
- != LOCKED_YES)
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
continue;
if (local->fd) {
+ flock_use = &flock;
if (!local->transaction.eager_lock[i]) {
goto wind;
}
@@ -637,43 +747,48 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
if (fd_ctx->lock_piggyback[i]) {
fd_ctx->lock_piggyback[i]--;
piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
}
}
UNLOCK (&local->fd->lock);
if (piggyback) {
afr_unlock_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0);
+ this, 1, 0, NULL);
if (!--call_count)
break;
continue;
}
- fd_ctx->lock_acquired[i]--;
+ flock_use = &full_flock;
wind:
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
if (!--call_count)
break;
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, &flock, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
@@ -685,24 +800,34 @@ out:
static int32_t
afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
- int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
local = frame->local;
+ int_lock = &local->internal_lock;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"%s: unlock failed on %d, reason: %s",
local->loc.path, child_index, strerror (op_errno));
}
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
return 0;
}
@@ -710,24 +835,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int
afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
+ call_count = afr_lockee_locked_nodes_count (int_lock);
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
int_lock->lk_call_count = call_count;
if (!call_count){
@@ -737,18 +860,23 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
if (!--call_count)
break;
@@ -762,15 +890,22 @@ out:
static int32_t
afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
+
LOCK (&frame->lock);
{
if (op_ret == -1) {
@@ -786,6 +921,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -794,10 +931,17 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_unlock (frame, this);
} else {
if (op_ret == 0) {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
}
- afr_lock_blocking (frame, this, child_index + 1);
+ afr_lock_blocking (frame, this, cky + 1);
}
return 0;
@@ -805,98 +949,26 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
-out:
- return 0;
-}
-
-static int32_t
afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long)cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -904,6 +976,7 @@ static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -914,18 +987,16 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
break;
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
break;
}
@@ -933,25 +1004,67 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
}
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
struct gf_flock flock = {0,};
uint64_t ctx = 0;
int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
if (local->fd) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -970,42 +1083,26 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index] ||
- !local->fd_open_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
}
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
- afr_copy_locked_nodes (frame, this);
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_unlock(frame, this);
+ afr_copy_locked_nodes (frame, this);
- return 0;
+ afr_unlock(frame, this);
+ return 0;
+ }
}
- if ((child_index == priv->child_count)
- || (int_lock->lock_count == int_lock->lk_expected_count)) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,
@@ -1018,12 +1115,18 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
if (local->fd) {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1031,11 +1134,12 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1043,63 +1147,44 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
- }
-
case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
if (local->fd) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
} else {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, local->transaction.basename,
child_index);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
}
break;
@@ -1127,11 +1212,12 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
break;
case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
up_count = afr_up_children_count (local->child_up,
priv->child_count);
- int_lock->lk_expected_count = 2 * up_count;
- //fallthrough
- case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
initialize_entrylk_variables (frame, this);
break;
}
@@ -1143,42 +1229,55 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
static int32_t
afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
local = frame->local;
int_lock = &local->internal_lock;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
op_errno, (long) cookie);
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
/* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index] |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
+ call_count = --int_lock->lk_call_count;
}
+ UNLOCK (&frame->lock);
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
@@ -1205,42 +1304,26 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-void
-afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx,
- size_t child_count)
-{
- int i = 0;
-
- GF_ASSERT (local->fd_open_on);
-
- memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count);
- for (i = 0; i < child_count; i++)
- if (fd_ctx->opened_on[i] == AFR_FD_OPENED)
- local->fd_open_on[i] = 1;
-}
-
int
afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int32_t call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
int i = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
initialize_entrylk_variables (frame, this);
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
if (local->fd) {
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
@@ -1253,11 +1336,11 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
return -1;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1270,42 +1353,52 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking entrylk calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
this->name, local->fd,
- basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
}
}
} else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
if (!--call_count)
break;
-
}
}
}
@@ -1315,70 +1408,69 @@ out:
int32_t
afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
afr_fd_ctx_t *fd_ctx = NULL;
- afr_private_t *priv = NULL;
- priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
LOCK (&frame->lock);
{
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
call_count = --int_lock->lk_call_count;
}
UNLOCK (&frame->lock);
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
-
- if (priv->eager_lock && local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- local->transaction.eager_lock[child_index] = 1;
- /* piggybacked */
-
- if (op_ret == 1) {
- /* piggybacked */
- } else if (op_ret == 0) {
- /* lock acquired from server */
- LOCK (&local->fd->lock);
- {
- fd_ctx->lock_acquired[child_index]++;
- }
- UNLOCK (&local->fd->lock);
- }
- }
- }
-
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last inode locking reply received");
/* all locks successful. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- int_lock->lk_expected_count) {
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
@@ -1402,30 +1494,29 @@ int
afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
- int32_t call_count = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = &flock;
- int piggyback = 0;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- gf_log (this->name, GF_LOG_DEBUG, "attempting data lock range %"PRIu64
- " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
- frame->root->lk_owner);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
- full_flock.l_type = int_lock->lk_flock.l_type;
+ full_flock.l_type = inodelk->flock.l_type;
initialize_inodelk_variables (frame, this);
@@ -1441,11 +1532,11 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
ret = -1;
goto out;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1460,14 +1551,18 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking inodelk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i] || !local->fd_open_on[i])
+ if (!local->child_up[i])
continue;
- if (!priv->eager_lock)
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
goto wind;
+ }
- flock_use = &full_flock;
piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
+
+ afr_set_delayed_post_op (frame, this);
LOCK (&local->fd->lock);
{
@@ -1481,21 +1576,23 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
if (piggyback) {
/* (op_ret == 1) => indicate piggybacked lock */
afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0);
+ this, 1, 0, NULL);
if (!--call_count)
break;
continue;
}
+ flock_use = &full_flock;
wind:
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, flock_use, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, flock_use);
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
if (!--call_count)
break;
@@ -1508,15 +1605,16 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (!local->child_up[i])
continue;
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
@@ -1526,200 +1624,6 @@ out:
return ret;
}
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
-
- return count;
-
-}
-
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
-
- return count;
-
-}
-
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- if (!--call_count)
- break;
-
- }
- }
-
-out:
- return 0;
-
-}
-
-
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
- return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
-
-}
-
int32_t
afr_unlock (call_frame_t *frame, xlator_t *this)
{
@@ -1731,10 +1635,8 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
if (is_afr_lock_transaction (local))
afr_unlock_inodelk (frame, this);
else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
+ afr_unlock_entrylk (frame, this);
+
} else {
if (is_afr_lock_selfheal (local))
afr_unlock_inodelk (frame, this);
@@ -1903,10 +1805,12 @@ out:
int32_t
afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock);
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
int32_t
afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -1930,7 +1834,7 @@ afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(void *) (long) source_child,
priv->children[source_child],
priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
+ local->fd, F_GETLK_FD, &flock, NULL);
return 0;
@@ -1958,7 +1862,7 @@ afr_recover_lock (call_frame_t *frame, xlator_t *this,
(void *) (long) lock_recovery_child,
priv->children[lock_recovery_child],
priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock);
+ local->fd, F_SETLK, flock, NULL);
return 0;
}
@@ -1976,7 +1880,8 @@ is_afr_lock_eol (struct gf_flock *lock)
int32_t
afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
if (op_ret) {
gf_log (this->name, GF_LOG_INFO,
@@ -2036,7 +1941,7 @@ afr_lock_recovery (call_frame_t *frame, xlator_t *this)
(void *) (long) source_child,
priv->children[source_child],
priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
+ local->fd, F_GETLK_FD, &flock, NULL);
out:
return ret;
@@ -2064,7 +1969,8 @@ out:
int32_t
afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
int32_t child_index = (long )cookie;
int ret = 0;
@@ -2136,8 +2042,7 @@ afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
(void *)(long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd,
- fdctx->wbflags);
+ &loc, fdctx->flags, local->fd, NULL);
return 0;
}
@@ -2186,7 +2091,7 @@ afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
goto out;
}
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
if (ret < 0) {
@@ -2232,27 +2137,38 @@ out:
return ret;
}
-void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count)
{
- afr_local_t *dst_local = NULL;
- afr_local_t *src_local = NULL;
- afr_internal_lock_t *dst_lock = NULL;
- afr_internal_lock_t *src_lock = NULL;
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
- dst_local = dst->local;
- dst_lock = &dst_local->internal_lock;
src_local = src->local;
src_lock = &src_local->internal_lock;
- if (src_lock->inode_locked_nodes) {
- memcpy (dst_lock->inode_locked_nodes,
- src_lock->inode_locked_nodes,
- sizeof (*dst_lock->inode_locked_nodes) * child_count);
- memset (src_lock->inode_locked_nodes, 0,
- sizeof (*src_lock->inode_locked_nodes) * child_count);
- }
-
- dst_lock->inodelk_lock_count = src_lock->inodelk_lock_count;
- src_lock->inodelk_lock_count = 0;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index ebe189c35..73594f265 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,7 +17,6 @@
enum gf_afr_mem_types_ {
gf_afr_mt_iovec = gf_common_mt_end + 1,
gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_local_t,
gf_afr_mt_afr_private_t,
gf_afr_mt_int32_t,
gf_afr_mt_char,
@@ -44,8 +34,17 @@ enum gf_afr_mem_types_ {
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
gf_afr_fd_paused_call_t,
- gf_afr_mt_afr_crawl_data_t,
- gf_afr_mt_afr_brick_pos_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_stats_t,
+ gf_afr_mt_shd_crawl_event_t,
+ gf_afr_mt_uint64_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 813b3c451..643a5d692 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -123,7 +114,7 @@ out:
int
afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = frame->local;
afr_private_t *priv = NULL;
@@ -132,7 +123,7 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (afr_open_only_data_self_heal (priv->data_self_heal))
afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ local->fd, xdata);
return 0;
}
@@ -140,7 +131,7 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_open_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_local_t * local = NULL;
int ret = 0;
@@ -162,8 +153,7 @@ afr_open_cbk (call_frame_t *frame, void *cookie,
local->success_count++;
ret = afr_child_fd_ctx_set (this, fd, child_index,
- local->cont.open.flags,
- local->cont.open.wbflags);
+ local->cont.open.flags);
if (ret) {
local->op_ret = -1;
local->op_errno = -ret;
@@ -181,12 +171,12 @@ unlock:
&& (local->op_ret >= 0)) {
STACK_WIND (frame, afr_open_ftruncate_cbk,
this, this->fops->ftruncate,
- fd, 0);
+ fd, 0, NULL);
} else {
if (afr_open_only_data_self_heal (priv->data_self_heal))
afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, xdata);
}
}
@@ -195,7 +185,7 @@ unlock:
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -225,7 +215,7 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto out;
}
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -236,7 +226,6 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
loc_copy (&local->loc, loc);
local->cont.open.flags = flags;
- local->cont.open.wbflags = wbflags;
local->fd = fd_ref (fd);
@@ -245,7 +234,7 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->open,
- loc, wind_flags, fd, wbflags);
+ loc, wind_flags, fd, xdata);
if (!--call_count)
break;
@@ -255,75 +244,35 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (open, frame, -1, op_errno, fd);
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
return 0;
}
-//NOTE: this function should be called with holding the lock on
-//fd to which fd_ctx belongs
-void
-afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx,
- struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- gf_boolean_t call = _gf_false;
-
- priv = this->private;
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- call = _gf_true;
- call_local = paused_call->frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (call_local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING))
- call = _gf_false;
- }
-
- if (call) {
- list_del_init (&paused_call->call_list);
- list_add (&paused_call->call_list, list);
- }
- }
-}
-
-void
-afr_resume_calls (xlator_t *this, struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
-
- list_for_each_entry_safe (paused_call, tmp, list, call_list) {
- list_del_init (&paused_call->call_list);
- call_local = paused_call->frame->local;
- call_local->fop_call_continue (paused_call->frame, this);
- GF_FREE (paused_call);
- }
-}
-
int
afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- struct list_head paused_calls = {0};
- gf_boolean_t fop_paused = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
priv = this->private;
local = frame->local;
- call_count = afr_frame_return (frame);
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ }
- //Note: No frame locking needed for this block of code
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
gf_log (this->name, GF_LOG_WARNING,
@@ -331,90 +280,73 @@ afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- fop_paused = local->fop_paused;
LOCK (&local->fd->lock);
{
if (op_ret >= 0) {
fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- gf_log (this->name, GF_LOG_INFO, "fd for %s opened "
- "successfully on subvolume %s", local->loc.path,
- priv->children[child_index]->name);
} else {
- //Change open status from OPENING to NOT OPENED.
fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
- if (call_count == 0) {
- INIT_LIST_HEAD (&paused_calls);
- afr_get_resumable_calls (this, fd_ctx, &paused_calls);
- }
}
UNLOCK (&local->fd->lock);
out:
- if (call_count == 0) {
- afr_resume_calls (this, &paused_calls);
- //If the fop is paused then resume_calls will continue the fop
- if (fop_paused)
- goto done;
-
- if (local->fop_call_continue)
- local->fop_call_continue (frame, this);
- else
- AFR_STACK_DESTROY (frame);
- }
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
-done:
return 0;
}
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open)
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- call_frame_t *open_frame = NULL;
- afr_local_t *open_local = NULL;
- int ret = -1;
- ia_type_t ia_type = IA_INVAL;
- int32_t op_errno = 0;
-
- GF_ASSERT (fd_ctx);
- GF_ASSERT (need_open_count > 0);
- GF_ASSERT (need_open);
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- local = frame->local;
priv = this->private;
- if (!local->fop_call_continue) {
- open_frame = copy_frame (frame);
- if (!open_frame) {
- ret = -ENOMEM;
- goto out;
- }
- ALLOC_OR_GOTO (open_frame->local, afr_local_t, out);
- open_local = open_frame->local;
- ret = afr_local_init (open_local, priv, &op_errno);
- if (ret < 0)
- goto out;
- loc_copy (&open_local->loc, &local->loc);
- open_local->fd = fd_ref (local->fd);
- } else {
- ret = 0;
- open_frame = frame;
- open_local = local;
+
+ if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
- open_local->call_count = need_open_count;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ local->call_count = need_open_count;
- gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd",
need_open_count);
- ia_type = open_local->fd->inode->ia_type;
- GF_ASSERT (ia_type != IA_INVAL);
for (i = 0; i < priv->child_count; i++) {
if (!need_open[i])
continue;
- if (IA_IFDIR == ia_type) {
+
+ if (IA_IFDIR == fd->inode->ia_type) {
gf_log (this->name, GF_LOG_DEBUG,
"opening fd for dir %s on subvolume %s",
local->loc.path, priv->children[i]->name);
@@ -423,25 +355,28 @@ afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
(void*) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- &open_local->loc, open_local->fd);
+ &local->loc, local->fd,
+ NULL);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"opening fd for file %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
(void *)(long) i,
priv->children[i],
priv->children[i]->fops->open,
- &open_local->loc, fd_ctx->flags,
- open_local->fd, fd_ctx->wbflags);
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
}
+ op_errno = 0;
+ ret = 0;
out:
if (op_errno)
- ret = -op_errno;
- if (ret && open_frame)
- AFR_STACK_DESTROY (open_frame);
- return ret;
+ ret = -1; //For handling ALLOC_OR_GOTO
+ if (ret && frame)
+ AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index cb1516d84..83846f152 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -1,23 +1,15 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <openssl/md5.h>
#include "glusterfs.h"
#include "afr.h"
#include "xlator.h"
@@ -33,7 +25,6 @@
#include "compat-errno.h"
#include "compat.h"
#include "byte-order.h"
-#include "md5.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
@@ -72,8 +63,7 @@ sh_private_cleanup (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
sh_priv = sh->private;
- if (sh_priv)
- GF_FREE (sh_priv);
+ GF_FREE (sh_priv);
}
static int
@@ -110,10 +100,10 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
}
sh_private_cleanup (sh_frame, this);
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
GF_ASSERT (!last_loop_frame);
//loop_finish should have happened and the old_loop should be NULL
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"self-heal aborting on %s",
local->loc.path);
@@ -121,10 +111,10 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
} else {
GF_ASSERT (last_loop_frame);
if (diff_blocks == total_blocks) {
- gf_log (this->name, GF_LOG_INFO, "full self-heal "
+ gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
"completed on %s",local->loc.path);
} else {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"diff self-heal on %s: completed. "
"(%d blocks of %d were different (%.2f%%))",
local->loc.path, diff_blocks, total_blocks,
@@ -153,7 +143,7 @@ sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
}
if (loop_sh && loop_sh->data_lock_held) {
- afr_sh_data_unlock (loop_frame, this,
+ afr_sh_data_unlock (loop_frame, this, this->name,
sh_destroy_frame);
} else {
sh_destroy_frame (loop_frame, this);
@@ -174,7 +164,7 @@ sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
sh_loop_finish (loop_sh->old_loop_frame, this);
loop_sh->old_loop_frame = NULL;
- gf_log (this->name, GF_LOG_DEBUG, "Aquired lock for range %"PRIu64
+ gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
" %"PRIu64, loop_sh->offset, loop_sh->block_size);
loop_sh->data_lock_held = _gf_true;
loop_sh->sh_data_algo_start (loop_frame, this);
@@ -224,7 +214,7 @@ sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
goto out;
//We want the frame to have same lk_owner as sh_frame
//so that locks translator allows conflicting locks
- new_loop_local = afr_local_copy (local, this);
+ new_loop_local = afr_self_heal_local_init (local, this);
if (!new_loop_local)
goto out;
new_loop_frame->local = new_loop_local;
@@ -239,7 +229,7 @@ sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
gf_afr_mt_char);
if (!new_loop_sh->write_needed)
goto out;
- new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LEN,
+ new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
gf_afr_mt_uint8_t);
if (!new_loop_sh->checksum)
goto out;
@@ -283,10 +273,10 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
new_loop_sh->offset = offset;
new_loop_sh->block_size = sh->block_size;
afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
- sh_loop_lock_success, sh_loop_lock_failure);
+ _gf_true, this->name, sh_loop_lock_success, sh_loop_lock_failure);
return 0;
out:
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (old_loop_frame)
sh_loop_finish (old_loop_frame, this);
sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
@@ -317,8 +307,9 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
sh_priv->loops_running--;
offset = sh_priv->offset;
block_size = sh->block_size;
- while ((!sh->eof_reached) && (0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
+ while ((!sh->eof_reached) &&
+ (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
+ (sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
@@ -337,7 +328,8 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
if (0 == loop) {
//loop finish does unlock, but the erasing of the pending
//xattrs needs to happen before that so do not finish the loop
- if (is_driver_done && !sh->op_failed)
+ if (is_driver_done &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
goto driver_done;
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@@ -348,7 +340,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
//If we have more loops to form we should finish previous loop after
//the next loop lock
while (loop--) {
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
// op failed in other loop, stop spawning more loops
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@@ -394,7 +386,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame
}
if (op_ret == -1) {
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
if (loop_frame) {
sh_loop_finish (loop_frame, this);
@@ -410,7 +402,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame
static int
sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * loop_local = NULL;
@@ -442,13 +434,22 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (loop_sh, op_errno);
+ } else if (op_ret < loop_local->cont.writev.vector->iov_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "incomplete write to %s on subvolume %s "
+ "(expected %lu, returned %d)", sh_local->loc.path,
+ priv->children[child_index]->name,
+ loop_local->cont.writev.vector->iov_len, op_ret);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
+ iobref_unref(loop_local->cont.writev.iobref);
+
sh_loop_return (sh_frame, this, loop_frame,
loop_sh->op_ret, loop_sh->op_errno);
}
@@ -456,12 +457,41 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
return 0;
}
+static void
+sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
+ afr_private_t *priv)
+{
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ if (!strcmp (sh->algo->name, "diff"))
+ return;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ /* full self-heal guarantees there exists atleast 1 file with size 0
+ * That means for other files we can preserve holes that come after
+ * its size before 'trim'
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->write_needed[i] &&
+ ((loop_sh->offset + 1) > sh->buf[i].ia_size))
+ loop_sh->write_needed[i] = 0;
+ }
+}
static int
sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * loop_local = NULL;
@@ -486,7 +516,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
if (op_ret <= 0) {
if (op_ret < 0) {
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
"for %s reason :%s", sh->source,
sh_local->loc.path, strerror (errno));
@@ -499,18 +529,26 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
goto out;
}
- if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0) {
- gf_log (this->name, GF_LOG_DEBUG, "0 filled block");
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- goto out;
- }
+ if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
+ sh_prune_writes_needed (sh_frame, loop_frame, priv);
call_count = sh_number_of_writes_needed (loop_sh->write_needed,
priv->child_count);
- GF_ASSERT (call_count > 0);
+ if (call_count == 0) {
+ sh_loop_return (sh_frame, this, loop_frame, 0, 0);
+ goto out;
+ }
+
loop_local->call_count = call_count;
+ /*
+ * We only really need the request size at the moment, but the buffer
+ * is required if we want to issue a retry in the event of a short write.
+ * Therefore, we duplicate the vector and ref the iobref here...
+ */
+ loop_local->cont.writev.vector = iov_dup(vector, count);
+ loop_local->cont.writev.iobref = iobref_ref(iobref);
+
for (i = 0; i < priv->child_count; i++) {
if (!loop_sh->write_needed[i])
continue;
@@ -519,7 +557,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
priv->children[i],
priv->children[i]->fops->writev,
loop_sh->healing_fd, vector, count,
- loop_sh->offset, iobref);
+ loop_sh->offset, 0, iobref, NULL);
if (!--call_count)
break;
@@ -546,7 +584,7 @@ sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
priv->children[loop_sh->source],
priv->children[loop_sh->source]->fops->readv,
loop_sh->healing_fd, loop_sh->block_size,
- loop_sh->offset);
+ loop_sh->offset, 0, NULL);
return 0;
}
@@ -555,7 +593,8 @@ sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
static int
sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *loop_local = NULL;
@@ -587,10 +626,10 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
- memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum, MD5_DIGEST_LEN);
+ memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
+ strong_checksum, MD5_DIGEST_LENGTH);
}
call_count = afr_frame_return (loop_frame);
@@ -600,9 +639,9 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
if (sh->sources[i] || !sh_local->child_up[i])
continue;
- if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LEN),
- loop_sh->checksum + (sh->source * MD5_DIGEST_LEN),
- MD5_DIGEST_LEN)) {
+ if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
+ loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
+ MD5_DIGEST_LENGTH)) {
/*
Checksums differ, so this block
must be written to this sink
@@ -625,7 +664,8 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
}
UNLOCK (&sh_priv->lock);
- if (write_needed && !sh->op_failed) {
+ if (write_needed &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh_loop_read (loop_frame, this);
} else {
sh_loop_return (sh_frame, this, loop_frame,
@@ -658,7 +698,7 @@ sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
priv->children[loop_sh->source],
priv->children[loop_sh->source]->fops->rchecksum,
loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size);
+ loop_sh->offset, loop_sh->block_size, NULL);
for (i = 0; i < priv->child_count; i++) {
if (loop_sh->sources[i] || !loop_local->child_up[i])
@@ -669,7 +709,7 @@ sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->rchecksum,
loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size);
+ loop_sh->offset, loop_sh->block_size, NULL);
if (!--call_count)
break;
@@ -714,14 +754,15 @@ out:
return sh_priv;
}
-void
-afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src,
+int
+afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count)
{
afr_local_t *dst_local = NULL;
afr_self_heal_t *dst_sh = NULL;
afr_local_t *src_local = NULL;
afr_self_heal_t *src_sh = NULL;
+ int ret = -1;
dst_local = dst->local;
dst_sh = &dst_local->self_heal;
@@ -729,9 +770,12 @@ afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src,
src_sh = &src_local->self_heal;
GF_ASSERT (src_sh->data_lock_held);
GF_ASSERT (!dst_sh->data_lock_held);
- afr_lk_transfer_datalock (dst, src, child_count);
+ ret = afr_lk_transfer_datalock (dst, src, dom, child_count);
+ if (ret)
+ return ret;
src_sh->data_lock_held = _gf_false;
dst_sh->data_lock_held = _gf_true;
+ return 0;
}
int
@@ -753,7 +797,10 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
if (ret)
goto out;
- afr_sh_transfer_lock (first_loop_frame, sh_frame, priv->child_count);
+ ret = afr_sh_transfer_lock (first_loop_frame, sh_frame, this->name,
+ priv->child_count);
+ if (ret)
+ goto out;
sh->private = afr_sh_priv_init ();
if (!sh->private) {
ret = -1;
@@ -763,7 +810,7 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
ret = 0;
out:
if (ret) {
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
sh_loop_driver_done (sh_frame, this, NULL);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
index 04d8e8a6c..6b20789b1 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
@@ -1,26 +1,16 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
#define __AFR_SELF_HEAL_ALGORITHM_H__
-
typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
xlator_t *this);
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index ad84f8541..ef92b4205 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "glusterfs.h"
@@ -27,6 +18,52 @@
#include "afr-self-heal.h"
#include "pump.h"
+#define ADD_FMT_STRING(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_NOT_ATTEMPTED != status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+#define ADD_FMT_STRING_SYNC(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_SYNC_BEGIN == status || \
+ AFR_SELF_HEAL_FAILED == status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ memset (sh->parentbufs, 0,
+ sizeof (*sh->parentbufs) * priv->child_count);
+ memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
+ memset (sh->locked_nodes, 0,
+ sizeof (*sh->locked_nodes) * priv->child_count);
+ sh->active_sinks = 0;
+
+ afr_reset_xattr (sh->xattr, priv->child_count);
+}
+
//Intersection[child]=1 if child is part of intersection
void
afr_children_intersection_get (int32_t *set1, int32_t *set2,
@@ -97,8 +134,8 @@ void
afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
{
sh->op_ret = -1;
- if (afr_error_more_important (sh->op_errno, op_errno))
- sh->op_errno = op_errno;
+ sh->op_errno = afr_most_important_error(sh->op_errno, op_errno,
+ _gf_false);
}
void
@@ -120,13 +157,85 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
}
sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_DEBUG,
- "pending_matrix: %s", buf);
+ gf_log (this->name, GF_LOG_DEBUG, "pending_matrix: %s", buf);
}
GF_FREE (buf);
}
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
+ int child_count = priv->child_count;
+ char *matrix_begin = "[ [ ";
+ char *matrix_end = "] ]";
+ char *seperator = "] [ ";
+ int pending_entry_strlen = 12; //Including space after entry
+ int matrix_begin_strlen = 0;
+ int matrix_end_strlen = 0;
+ int seperator_strlen = 0;
+ int string_length = 0;
+ char *msg = "- Pending matrix: ";
+
+ /*
+ * for a list of lists of [ [ a b ] [ c d ] ]
+ * */
+
+ matrix_begin_strlen = strlen (matrix_begin);
+ matrix_end_strlen = strlen (matrix_end);
+ seperator_strlen = strlen (seperator);
+ string_length = matrix_begin_strlen + matrix_end_strlen
+ + (child_count -1) * seperator_strlen
+ + (child_count * child_count * pending_entry_strlen);
+
+ buf = GF_CALLOC (1, 1 + strlen (msg) + string_length , gf_afr_mt_char);
+ if (!buf)
+ goto out;
+
+ ptr = buf;
+ ptr += sprintf (ptr, "%s", msg);
+ ptr += sprintf (ptr, "%s", matrix_begin);
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ if (i < priv->child_count -1)
+ ptr += sprintf (ptr, "%s", seperator);
+ }
+
+ ptr += sprintf (ptr, "%s", matrix_end);
+
+out:
+ return buf;
+}
+
+void
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc)
+{
+ char *buf = NULL;
+ char *free_ptr = NULL;
+
+ buf = afr_get_pending_matrix_str (pending_matrix, this);
+ if (buf)
+ free_ptr = buf;
+ else
+ buf = "";
+
+
+ gf_log (this->name, GF_LOG_ERROR, "Unable to self-heal contents of '%s'"
+ " (possible split-brain). Please delete the file from all but "
+ "the preferred subvolume.%s", loc, buf);
+ GF_FREE (free_ptr);
+ return;
+}
+
+
void
afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
{
@@ -392,6 +501,8 @@ afr_find_biggest_witness_among_fools (int32_t *witnesses,
{
int i = 0;
int biggest_witness = -1;
+ int biggest_witness_idx = -1;
+ int biggest_witness_cnt = -1;
GF_ASSERT (witnesses);
GF_ASSERT (characters);
@@ -401,10 +512,21 @@ afr_find_biggest_witness_among_fools (int32_t *witnesses,
if (characters[i].type != AFR_NODE_FOOL)
continue;
- if (biggest_witness < witnesses[i])
+ if (biggest_witness < witnesses[i]) {
biggest_witness = witnesses[i];
+ biggest_witness_idx = i;
+ biggest_witness_cnt = 1;
+ continue;
+ }
+
+ if (biggest_witness == witnesses[i])
+ biggest_witness_cnt++;
}
- return biggest_witness;
+
+ if (biggest_witness_cnt != 1)
+ return -1;
+
+ return biggest_witness_idx;
}
int
@@ -432,10 +554,84 @@ afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
return nsources;
}
+
+int
+afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx)
+{
+ if (idx >= 0 && idx < child_count) {
+ sources[idx] = 1;
+ return 1;
+ }
+ return 0;
+}
+
+
+static int
+afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_size = 0;
+ uint64_t min_size = 0;
+ int num_children = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_size > max_size) {
+ max_size = bufs[child].ia_size;
+ idx = child;
+ }
+
+ if ((num_children == 0) || (bufs[child].ia_size < min_size)) {
+ min_size = bufs[child].ia_size;
+ }
+
+ num_children++;
+ }
+
+ /* If sizes are same for all of them, finding sources will have to
+ * happen with pending changelog. So return -1
+ */
+ if ((num_children > 1) && (min_size == max_size))
+ return -1;
+ return idx;
+}
+
+
+static int
+afr_find_newest_file (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_ctime = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_ctime > max_ctime) {
+ max_ctime = bufs[child].ia_ctime;
+ idx = child;
+ }
+ }
+
+ return idx;
+}
+
+
static int
afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
afr_node_character *characters,
- int child_count)
+ int32_t *success_children,
+ int child_count, struct iatt *bufs)
{
int32_t biggest_witness = 0;
int nsources = 0;
@@ -443,6 +639,11 @@ afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
GF_ASSERT (child_count > 0);
+ biggest_witness = afr_find_largest_file_size (bufs, success_children,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
gf_afr_mt_int32_t);
if (NULL == witnesses) {
@@ -455,12 +656,17 @@ afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
characters,
child_count);
- nsources = afr_mark_fool_as_source_by_witness (sources, witnesses,
- characters, child_count,
- biggest_witness);
+ if (biggest_witness != -1)
+ goto found;
+
+ biggest_witness = afr_find_newest_file (bufs, success_children,
+ child_count);
+
+found:
+ nsources = afr_mark_fool_as_source_by_idx (sources, child_count,
+ biggest_witness);
out:
- if (witnesses)
- GF_FREE (witnesses);
+ GF_FREE (witnesses);
return nsources;
}
@@ -526,6 +732,78 @@ out:
return nsources;
}
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int child = -1;
+ int read_child = -1;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (read_child < 0)
+ read_child = child;
+ else if (bufs[read_child].ia_size < bufs[child].ia_size)
+ read_child = child;
+ }
+ return read_child;
+}
+
+int
+afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
+{
+ int nsources = 0;
+ int i = 0;
+ int child = 0;
+ gf_boolean_t sink_exists = _gf_false;
+ gf_boolean_t source_exists = _gf_false;
+ int source = -1;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (!bufs[child].ia_size) {
+ sink_exists = _gf_true;
+ continue;
+ }
+ if (!source_exists) {
+ source_exists = _gf_true;
+ source = child;
+ continue;
+ }
+ if (bufs[source].ia_size != bufs[child].ia_size) {
+ nsources = -1;
+ goto out;
+ }
+ }
+ if (!source_exists && !sink_exists) {
+ nsources = -1;
+ goto out;
+ }
+
+ if (!source_exists || !sink_exists)
+ goto out;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (bufs[child].ia_size) {
+ sources[child] = 1;
+ nsources++;
+ }
+ }
+out:
+ return nsources;
+}
+
char *
afr_get_character_str (afr_node_type type)
{
@@ -691,11 +969,24 @@ afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
afr_find_character_types (characters, pending_matrix, success_children,
child_count);
if (afr_sh_all_nodes_innocent (characters, child_count)) {
- if (type == AFR_SELF_HEAL_METADATA)
+ switch (type) {
+ case AFR_SELF_HEAL_METADATA:
nsources = afr_sh_mark_lowest_uid_as_source (bufs,
success_children,
child_count,
sources);
+ break;
+ case AFR_SELF_HEAL_DATA:
+ nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
+ success_children,
+ child_count,
+ sources);
+ if ((nsources < 0) && subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
+ break;
+ default:
+ break;
+ }
goto out;
}
@@ -717,7 +1008,8 @@ afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
nsources = afr_mark_biggest_of_fools_as_source (sources,
pending_matrix,
characters,
- child_count);
+ success_children,
+ child_count, bufs);
}
out:
@@ -735,50 +1027,108 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
+ int tgt = 0;
+ int src = 0;
+ int value = 0;
afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
xattr, type, priv->child_count);
- for (i = 0; i < priv->child_count; i++)
- for (j = 0; j < priv->child_count; j++)
- delta_matrix[i][j] = -delta_matrix[i][j];
+
+ /*
+ * The algorithm here has two parts. First, for each subvol indexed
+ * as tgt, we try to figure out what count everyone should have for it.
+ * If the self-heal succeeded, that's easy; the value is zero.
+ * Otherwise, the value is the maximum of the succeeding nodes' counts.
+ * Once we know the value, we loop through (possibly for a second time)
+ * setting each count to the difference so that when we're done all
+ * succeeding nodes will have the same count for tgt.
+ */
+ for (tgt = 0; tgt < priv->child_count; ++tgt) {
+ value = 0;
+ if (!success[tgt]) {
+ /* Find the maximum. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (!success[src]) {
+ continue;
+ }
+ if (delta_matrix[src][tgt] > value) {
+ value = delta_matrix[src][tgt];
+ }
+ }
+ }
+ /* Force everyone who succeeded to the chosen value. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (success[src]) {
+ delta_matrix[src][tgt] = value
+ - delta_matrix[src][tgt];
+ }
+ else {
+ delta_matrix[src][tgt] = 0;
+ }
+ }
+ }
}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ int32_t *pending = NULL;
+ int32_t *local_pending = NULL;
+ afr_private_t *priv = NULL;
+ priv = this->private;
for (i = 0; i < child_count; i++) {
if (!xattr[i])
continue;
+ local_pending = NULL;
for (j = 0; j < child_count; j++) {
pending = GF_CALLOC (sizeof (int32_t), 3,
gf_afr_mt_int32_t);
- if (!pending)
+ if (!pending) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate pending entry "
+ "for %s[%d] on %s",
+ priv->pending_key[j], type,
+ priv->children[i]->name);
continue;
+ }
/* 3 = data+metadata+entry */
k = afr_index_for_transaction_type (type);
pending[k] = hton32 (delta_matrix[i][j]);
+ if (j == i) {
+ local_pending = pending;
+ continue;
+ }
ret = dict_set_bin (xattr[i], priv->pending_key[j],
pending,
- 3 * sizeof (int32_t));
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_WARNING,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
"Unable to set dict value.");
+ GF_FREE (pending);
+ }
+ }
+ if (local_pending) {
+ ret = dict_set_bin (xattr[i], priv->pending_key[i],
+ local_pending,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (local_pending);
+ }
}
}
return 0;
@@ -786,127 +1136,23 @@ afr_sh_delta_to_xattr (afr_private_t *priv,
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
-
- if (pending[j])
- return 1;
- }
-
- return 0;
-}
-
-
-int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
-
- if (pending[j])
- return 1;
- }
-
- return 0;
-}
-
-
-int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
-
- if (pending[j])
- return 1;
- }
-
- return 0;
-}
-
-int
afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 0;
- }
+ afr_sh_reset (frame, this);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda || sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
+ if (local->unhealable) {
+ gf_log (this->name, GF_LOG_DEBUG,
"split brain found, aborting selfheal of %s",
local->loc.path);
- sh->op_failed = 1;
+ }
+
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh->completion_cbk (frame, this);
} else {
gf_log (this->name, GF_LOG_TRACE,
@@ -992,7 +1238,7 @@ afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
sh->success_count++;
sh->xattr[child_index] = dict_ref (xattr);
} else {
- gf_log (this->name, GF_LOG_ERROR, "path %s on subvolume"
+ gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume"
" %s => -1 (%s)", loc->path,
priv->children[child_index]->name,
strerror (op_errno));
@@ -1023,8 +1269,7 @@ afr_valid_ia_type (ia_type_t ia_type)
int
afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, int ret_child, mode_t entry_mode,
- call_frame_t **impunge_frame)
+ int active_source, call_frame_t **impunge_frame)
{
afr_local_t *local = NULL;
afr_local_t *impunge_local = NULL;
@@ -1041,21 +1286,24 @@ afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
local = frame->local;
new_frame->local = impunge_local;
impunge_sh = &impunge_local->self_heal;
impunge_sh->sh_frame = frame;
impunge_sh->active_source = active_source;
- impunge_sh->impunge_ret_child = ret_child;
- impunge_sh->impunging_entry_mode = entry_mode;
impunge_local->child_up = memdup (local->child_up,
sizeof (*local->child_up) *
priv->child_count);
if (!impunge_local->child_up)
goto out;
+ impunge_local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!impunge_local->pending)
+ goto out;
+
ret = afr_sh_common_create (impunge_sh, priv->child_count);
if (ret) {
op_errno = -ret;
@@ -1070,54 +1318,89 @@ out:
}
void
-afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent,
- afr_impunge_done_cbk_t impunge_done)
+afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
+ struct iatt *buf,
+ struct iatt *postparent,
+ afr_impunge_done_cbk_t impunge_done)
{
call_frame_t *impunge_frame = NULL;
afr_local_t *local = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
int ret = 0;
- mode_t mode = 0;
+ unsigned int enoent_count = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
local = frame->local;
sh = &local->self_heal;
- mode = st_mode_from_ia (buf->ia_prot, buf->ia_type);
- ret = afr_impunge_frame_create (frame, this, sh->source, child_index,
- mode, &impunge_frame);
+ priv = this->private;
+
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (!enoent_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no missing files - %s. proceeding to metadata check",
+ local->loc.path);
+ goto out;
+ }
+ sh->impunge_done = impunge_done;
+ ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
if (ret)
goto out;
impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
loc_copy (&impunge_local->loc, &local->loc);
- sh->impunge_done = impunge_done;
- impunge_local->call_count = 1;
- afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf,
- postparent);
+ ret = afr_build_parent_loc (&impunge_sh->parent_loc,
+ &impunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ impunge_local->call_count = enoent_count;
+ impunge_sh->entrybuf = sh->buf[sh->source];
+ impunge_sh->parentbuf = sh->parentbufs[sh->source];
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (sh->child_errno[i] != ENOENT) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->child_errno[i] != ENOENT)
+ continue;
+ afr_sh_entry_impunge_create (impunge_frame, this, i);
+ enoent_count--;
+ }
+ GF_ASSERT (!enoent_count);
return;
out:
- gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s",
- local->loc.path, strerror (-ret));
- impunge_done (frame, this, child_index, -1, -ret);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
+ "reason: %s", local->loc.path, strerror (-ret));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ afr_sh_missing_entries_finish (frame, this);
}
int
-afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
+afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
int32_t op_ret, int32_t op_errno)
{
- int call_count = 0;
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "create entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_missing_entries_finish (frame, this);
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
return 0;
}
@@ -1127,26 +1410,11 @@ sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
int type = 0;
- afr_private_t *priv = NULL;
- int enoent_count = 0;
- int i = 0;
struct iatt *buf = NULL;
struct iatt *postparent = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- afr_sh_missing_entries_finish (frame, this);
- return 0;
- }
buf = &sh->buf[sh->source];
postparent = &sh->parentbufs[sh->source];
@@ -1155,22 +1423,14 @@ sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
if (!afr_valid_ia_type (type)) {
gf_log (this->name, GF_LOG_ERROR,
"%s: unknown file type: 0%o", local->loc.path, type);
- local->govinda_gOvinda = 1;
+ afr_set_local_for_unhealable (local);
afr_sh_missing_entries_finish (frame, this);
goto out;
}
- local->call_count = enoent_count;
- for (i = 0; i < priv->child_count; i++) {
- //If !child_up errno will be zero
- if (sh->child_errno[i] != ENOENT)
- continue;
- afr_sh_call_entry_impunge_recreate (frame, this, i,
+ afr_sh_missing_entry_call_impunge_recreate (frame, this,
buf, postparent,
afr_sh_create_entry_cbk);
- enoent_count--;
- }
- GF_ASSERT (enoent_count == 0);
out:
return 0;
}
@@ -1185,6 +1445,10 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
ia_type_t ia_type = IA_INVAL;
int32_t nsources = 0;
loc_t *loc = NULL;
+ int32_t subvol_status = 0;
+ afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
+ gf_boolean_t split_brain = _gf_false;
+ int read_child = -1;
local = frame->local;
sh = &local->self_heal;
@@ -1192,25 +1456,47 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
loc = &local->loc;
if (op_ret < 0) {
- if (op_errno == EIO)
- local->govinda_gOvinda = 1;
+ if (op_errno == EIO) {
+ afr_set_local_for_unhealable (local);
+ }
// EIO can happen if finding the fresh parent dir failed
goto out;
}
//now No chance for the ia_type to conflict
ia_type = sh->buf[sh->success_children[0]].ia_type;
+ txn_type = afr_transaction_type_get (ia_type);
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
- sh->success_children,
- afr_transaction_type_get (ia_type),
- NULL, _gf_false);
+ sh->success_children, txn_type,
+ &subvol_status, _gf_false);
if (nsources < 0) {
gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
" in missing entry self-heal, continuing with the rest"
" of the self-heals", local->loc.path);
- op_errno = EIO;
- goto out;
+ if (subvol_status & SPLIT_BRAIN) {
+ split_brain = _gf_true;
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ nsources = 1;
+ sh->sources[sh->success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child
+ (this,
+ sh->success_children,
+ sh->buf);
+ sh->sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ op_errno = EIO;
+ goto out;
+ }
+ } else {
+ op_errno = EIO;
+ goto out;
+ }
}
afr_get_fresh_children (sh->success_children, sh->sources,
@@ -1227,10 +1513,14 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
sh->type = sh->buf[sh->source].ia_type;
if (uuid_is_null (loc->inode->gfid))
uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
- sh_missing_entries_create (frame, this);
+ if (split_brain) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ sh_missing_entries_create (frame, this);
+ }
return;
out:
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1314,7 +1604,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
LOCK (&frame->lock);
{
afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
}
@@ -1336,13 +1626,14 @@ afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
afr_self_heal_t *sh = NULL;
afr_self_heal_t *expunge_sh = NULL;
int32_t op_errno = 0;
+ int ret = 0;
expunge_frame = copy_frame (frame);
if (!expunge_frame) {
goto out;
}
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
local = frame->local;
sh = &local->self_heal;
@@ -1350,6 +1641,12 @@ afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
expunge_sh = &expunge_local->self_heal;
expunge_sh->sh_frame = frame;
loc_copy (&expunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&expunge_sh->parent_loc,
+ &expunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
sh->expunge_done = expunge_done;
afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
parentbuf);
@@ -1389,7 +1686,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_missing_entries_finish (frame, this);
} else {
if (afr_gfid_missing_count (this->name, sh->fresh_children,
@@ -1399,7 +1696,8 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
afr_sh_missing_entries_lookup_done,
sh->sh_gfid_req,
AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
} else {
//No need to set gfid so goto missing entries lookup done
//Behave as if you have done the lookup
@@ -1481,7 +1779,7 @@ afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
if (!purge_condition (local, priv, i))
continue;
gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
- "on %d", local->loc.path, i);
+ "on %s", local->loc.path, priv->children[i]->name);
afr_sh_call_entry_expunge_remove (frame, this,
(long) i, &sh->buf[i],
&sh->parentbufs[i],
@@ -1601,10 +1899,8 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
sh->child_errno,
priv->child_count, ENOENT);
if (fresh_child_enoents == fresh_parent_count) {
- gf_log (this->name, GF_LOG_INFO, "Deleting stale file %s",
- local->loc.path);
afr_sh_set_error (sh, ENOENT);
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_purge_entry (frame, this);
} else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
priv->child_count, local->loc.path,
@@ -1618,14 +1914,14 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
afr_sh_purge_stale_entry (frame, this);
} else {
op_errno = EIO;
- local->govinda_gOvinda = 1;
+ afr_set_local_for_unhealable (local);
goto fail;
}
return;
fail:
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1647,25 +1943,17 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
sh = &local->self_heal;
priv = this->private;
- /* If We can't find a fresh parent directory here,
- * we wont know which subvol is correct without finding a parent dir
- * upwards which has correct xattrs, for that we may have to
- * do lookups till root, we dont wanna do that,
- * instead make sure that if there are conflicting gfid
- * parent dirs, self-heal thus lookup is failed with EIO.
- * if there are missing entries we dont know whether to delete or
- * create so fail with EIO,
- * If there are conflicting xattr fail with EIO.
- */
if (op_ret < 0)
goto out;
enoent_count = afr_errno_count (NULL, sh->child_errno,
priv->child_count, ENOENT);
if (enoent_count > 0) {
- gf_log (this->name, GF_LOG_ERROR, "Parent dir missing for %s,"
- " in missing entry self-heal, aborting self-heal",
+ gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
+ " in missing entry self-heal, aborting missing-entry "
+ "self-heal",
local->loc.path);
- goto out;
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
nsources = afr_build_sources (this, sh->xattr, sh->buf,
@@ -1684,6 +1972,7 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
"of %s, in missing entry self-heal, aborting "
"self-heal", local->loc.path);
+ op_errno = EIO;
goto out;
}
@@ -1691,18 +1980,20 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
if (source == -1) {
GF_ASSERT (0);
gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
goto out;
}
afr_get_fresh_children (sh->success_children, sh->sources,
sh->fresh_parent_dirs, priv->child_count);
afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_children_lookup_done, NULL, 0);
+ afr_sh_children_lookup_done, NULL, 0,
+ NULL);
return;
out:
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
+ afr_sh_set_error (sh, op_errno);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
return;
}
@@ -1730,7 +2021,7 @@ afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
int
afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
afr_local_t *local = NULL;
int i = 0;
@@ -1791,7 +2082,8 @@ afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
int
-afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
+afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
+ xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -1804,7 +2096,7 @@ afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_INFO,
"Non blocking entrylks failed.");
- sh->op_failed = -1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_done (frame, this);
} else {
@@ -1812,34 +2104,8 @@ afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
"Non blocking entrylks done. Proceeding to FOP");
afr_sh_common_lookup (frame, this, &sh->parent_loc,
afr_sh_find_fresh_parents,
- NULL, AFR_LOOKUP_FAIL_CONFLICTS);
- }
-
- return 0;
-}
-
-int
-afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- afr_sh_missing_entries_done (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req, AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
+ NULL, AFR_LOOKUP_FAIL_CONFLICTS,
+ NULL);
}
return 0;
@@ -1851,7 +2117,9 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
@@ -1863,7 +2131,12 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
int_lock->lk_basename = base_name;
int_lock->lk_loc = loc;
int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = this->name;
+ int_lock->lockee_count = 0;
+ afr_init_entry_lockee (&int_lock->lockee[0], local, loc,
+ base_name, priv->child_count);
+ int_lock->lockee_count++;
afr_nonblocking_entrylk (frame, this);
return 0;
@@ -1875,6 +2148,9 @@ afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
local = frame->local;
sh = &local->self_heal;
@@ -1883,43 +2159,52 @@ afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
"attempting to recreate missing entries for path=%s",
local->loc.path);
- GF_ASSERT (local->loc.parent);
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno);
+ if (ret)
+ goto out;
afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
lock_cbk);
return 0;
-}
-
-static int
-afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_conflicting_sh_cbk);
+out:
+ int_lock = &local->internal_lock;
+ int_lock->lock_op_ret = -1;
+ lock_cbk (frame, this);
return 0;
}
static int
-afr_self_heal_gfids (call_frame_t *frame, xlator_t *this)
+afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+
afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_gfid_sh_cbk);
+ afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
return 0;
}
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+afr_local_t*
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *lc = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *shc = NULL;
+ int ret = 0;
priv = this->private;
sh = &l->self_heal;
- lc = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
+ lc = mem_get0 (this->local_pool);
if (!lc)
goto out;
@@ -1932,16 +2217,27 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
shc->do_data_self_heal = sh->do_data_self_heal;
shc->do_metadata_self_heal = sh->do_metadata_self_heal;
shc->do_entry_self_heal = sh->do_entry_self_heal;
+ shc->force_confirm_spb = sh->force_confirm_spb;
shc->forced_merge = sh->forced_merge;
shc->background = sh->background;
shc->type = sh->type;
+ shc->data_sh_info = "";
+ shc->metadata_sh_info = "";
uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
+ if (l->loc.path) {
+ ret = loc_copy (&lc->loc, &l->loc);
+ if (ret < 0)
+ goto out;
+ }
lc->child_up = memdup (l->child_up,
sizeof (*lc->child_up) * priv->child_count);
+ if (!lc->child_up) {
+ ret = -1;
+ goto out;
+ }
+
if (l->xattr_req)
lc->xattr_req = dict_ref (l->xattr_req);
@@ -1949,40 +2245,25 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
if (l->cont.lookup.xattr)
lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- sizeof (*lc->internal_lock.inode_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.entry_locked_nodes)
- lc->internal_lock.entry_locked_nodes =
- memdup (l->internal_lock.entry_locked_nodes,
- sizeof (*lc->internal_lock.entry_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.entry_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- sizeof (*lc->internal_lock.locked_nodes) * priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+ lc->internal_lock.locked_nodes =
+ GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
+ priv->child_count, gf_afr_mt_char);
+ if (!lc->internal_lock.locked_nodes) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_inodelk_init (&lc->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret)
+ goto out;
out:
+ if (ret) {
+ afr_local_cleanup (lc, this);
+ lc = NULL;
+ }
return lc;
}
@@ -1992,32 +2273,39 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
+ afr_local_t * orig_frame_local = NULL;
+ afr_self_heal_t * orig_frame_sh = NULL;
char sh_type_str[256] = {0,};
- gf_boolean_t split_brain = _gf_false;
+ gf_loglevel_t loglevel = 0;
priv = this->private;
local = bgsh_frame->local;
sh = &local->self_heal;
- if (local->govinda_gOvinda)
- split_brain = _gf_true;
-
- afr_set_split_brain (this, sh->inode, split_brain);
+ if (local->unhealable) {
+ afr_set_split_brain (this, sh->inode, SPB, SPB);
+ }
afr_self_heal_type_str_get (sh, sh_type_str,
sizeof(sh_type_str));
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_ERROR, "background %s self-heal "
- "failed on %s", sh_type_str, local->loc.path);
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
+ loglevel = GF_LOG_ERROR;
+ } else if (!is_self_heal_failed (sh, AFR_CHECK_ALL)) {
+ loglevel = GF_LOG_INFO;
} else {
- gf_log (this->name, GF_LOG_INFO, "background %s self-heal "
- "completed on %s", sh_type_str, local->loc.path);
+ loglevel = GF_LOG_DEBUG;
}
+ afr_log_self_heal_completion_status (local, loglevel);
+
FRAME_SU_UNDO (bgsh_frame, afr_local_t);
if (!sh->unwound && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
+ orig_frame_local = sh->orig_frame->local;
+ orig_frame_sh = &orig_frame_local->self_heal;
+ orig_frame_sh->actual_sh_started = _gf_true;
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
}
if (sh->background) {
@@ -2039,7 +2327,6 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
int32_t op_errno = 0;
int ret = 0;
afr_self_heal_t *orig_sh = NULL;
@@ -2060,21 +2347,20 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
local->self_heal.do_data_self_heal,
local->self_heal.do_entry_self_heal);
- op_errno = ENOMEM;
+ op_errno = ENOMEM;
sh_frame = copy_frame (frame);
if (!sh_frame)
goto out;
- afr_set_lk_owner (sh_frame, this);
+ afr_set_lk_owner (sh_frame, this, sh_frame->root);
afr_set_low_priority (sh_frame);
- sh_local = afr_local_copy (local, this);
+ sh_local = afr_self_heal_local_init (local, this);
if (!sh_local)
goto out;
sh_frame->local = sh_local;
sh = &sh_local->self_heal;
sh->inode = inode_ref (inode);
-
sh->orig_frame = frame;
sh->completion_cbk = afr_self_heal_completion_cbk;
@@ -2093,30 +2379,16 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
if (!sh->locked_nodes)
goto out;
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
+ sh->pending_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
if (!sh->pending_matrix)
goto out;
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->pending_matrix[i])
- goto out;
- }
-
- sh->delta_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
+ sh->delta_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
if (!sh->delta_matrix)
goto out;
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->delta_matrix)
- goto out;
- }
+
sh->fresh_parent_dirs = afr_children_create (priv->child_count);
if (!sh->fresh_parent_dirs)
goto out;
@@ -2142,12 +2414,16 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
UNLOCK (&priv->lock);
}
+ if (!local->loc.parent) {
+ sh->do_missing_entry_self_heal = _gf_false;
+ sh->do_gfid_self_heal = _gf_false;
+ }
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
+
FRAME_SU_DO (sh_frame, afr_local_t);
- if (sh->do_missing_entry_self_heal) {
- afr_self_heal_conflicting_entries (sh_frame, this);
- } else if (sh->do_gfid_self_heal) {
- GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
- afr_self_heal_gfids (sh_frame, this);
+ if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
+ afr_self_heal_missing_entries (sh_frame, this);
} else {
loc = &sh_local->loc;
if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
@@ -2166,7 +2442,9 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
out:
if (op_errno) {
- orig_sh->unwind (frame, this, -1, op_errno);
+ orig_sh->unwind (frame, this, -1, op_errno, 1);
+ if (sh_frame)
+ AFR_STACK_DESTROY (sh_frame);
}
return 0;
}
@@ -2223,13 +2501,21 @@ afr_self_heal_type_for_transaction (afr_transaction_type type)
}
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, uuid_t gfid)
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
int ret = -1;
+ uuid_t pargfid = {0};
- if (!child) {
+ if (!child)
+ goto out;
+
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
+
+ if (uuid_is_null (pargfid))
goto out;
- }
if (strcmp (parent->path, "/") == 0)
ret = gf_asprintf ((char **)&child->path, "/%s", name);
@@ -2242,27 +2528,285 @@ afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, uu
"asprintf failed while setting child path");
}
- if (!child->path) {
- goto out;
- }
-
child->name = strrchr (child->path, '/');
if (child->name)
child->name++;
child->parent = inode_ref (parent->inode);
child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
if (!child->inode) {
ret = -1;
goto out;
}
- uuid_copy (child->gfid, gfid);
ret = 0;
out:
- if (ret == -1)
+ if ((ret == -1) && child)
loc_wipe (child);
return ret;
}
+
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this))
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
+ sh->success, priv->child_count, type);
+
+ erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!erase_xattr)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+ erase_xattr[i] = dict_new ();
+ if (!erase_xattr[i])
+ goto out;
+ }
+ }
+
+ afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
+ priv->child_count, type);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ afr_sh_print_pending_matrix (sh->delta_matrix, this);
+ local->call_count = call_count;
+ if (call_count == 0) {
+ ret = 0;
+ finish (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ if (sh->healing_fd) {//true for ENTRY, reg file DATA transaction
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ sh->healing_fd,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ }
+ }
+
+ ret = 0;
+out:
+ if (erase_xattr) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ }
+
+ GF_FREE (erase_xattr);
+
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ finish (frame, this);
+ }
+
+ return 0;
+}
+
+void
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
+{
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
+
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
+}
+
+void
+afr_set_local_for_unhealable (afr_local_t *local)
+{
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ local->unhealable = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+}
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
+{
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
+
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
+
+ }
+
+out:
+ return sh_failed;
+}
+
+char *
+get_sh_completion_status (afr_self_heal_status status)
+{
+
+ char *not_attempted = " is not attempted";
+ char *failed = " failed";
+ char *started = " is started";
+ char *sync_begin = " is successfully completed";
+ char *result = " has unknown status";
+
+ switch (status)
+ {
+ case AFR_SELF_HEAL_NOT_ATTEMPTED:
+ result = not_attempted;
+ break;
+ case AFR_SELF_HEAL_FAILED:
+ result = failed;
+ break;
+ case AFR_SELF_HEAL_STARTED:
+ result = started;
+ break;
+ case AFR_SELF_HEAL_SYNC_BEGIN:
+ result = sync_begin;
+ break;
+ }
+
+ return result;
+
+}
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl)
+{
+
+ char sh_log[4096] = {0};
+ afr_self_heal_t *sh = &local->self_heal;
+ afr_sh_status_for_all_type all_status = sh->afr_all_sh_status;
+ xlator_t *this = NULL;
+ size_t off = 0;
+ int data_sh = 0;
+ int metadata_sh = 0;
+ int print_log = 0;
+
+ this = THIS;
+
+ ADD_FMT_STRING (sh_log, off, "gfid or missing entry",
+ all_status.gfid_or_missing_entry_self_heal, print_log);
+ ADD_FMT_STRING_SYNC (sh_log, off, "metadata",
+ all_status.metadata_self_heal, print_log);
+ if (sh->background) {
+ ADD_FMT_STRING_SYNC (sh_log, off, "backgroung data",
+ all_status.data_self_heal, print_log);
+ } else {
+ ADD_FMT_STRING_SYNC (sh_log, off, "foreground data",
+ all_status.data_self_heal, print_log);
+ }
+ ADD_FMT_STRING_SYNC (sh_log, off, "entry", all_status.entry_self_heal,
+ print_log);
+
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.data_self_heal &&
+ strcmp (sh->data_sh_info, "") && sh->data_sh_info )
+ data_sh = 1;
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.metadata_self_heal &&
+ strcmp (sh->metadata_sh_info, "") && sh->metadata_sh_info)
+ metadata_sh = 1;
+
+ if (!print_log)
+ return;
+
+ gf_log (this->name, loglvl, "%s %s %s on %s", sh_log,
+ ((data_sh == 1) ? sh->data_sh_info : ""),
+ ((metadata_sh == 1) ? sh->metadata_sh_info : ""),
+ local->loc.path);
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index b7a736a74..473264776 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -1,33 +1,18 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_COMMON_H__
#define __AFR_SELF_HEAL_COMMON_H__
#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
-
-typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
+#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
AFR_LOOKUP_FAIL_CONFLICTS = 1,
@@ -43,6 +28,10 @@ afr_sh_source_count (int sources[], int child_count);
void
afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
+void
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc);
+
int
afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
unsigned char *ignorant_subvols,
@@ -60,7 +49,7 @@ afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
int32_t *success_children, int32_t *subvol_status);
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type);
@@ -90,7 +79,7 @@ afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
int
afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int
afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
int active_src, struct iatt *buf,
@@ -100,16 +89,15 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
char *base_name, afr_lock_cbk_t lock_cbk);
int
afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent);
+ int child_index);
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
afr_lock_cbk_t lock_cbk);
afr_local_t *
-afr_local_copy (afr_local_t *l, xlator_t *this);
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this);
int
afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
+ off_t start, off_t len, gf_boolean_t block, char *dom,
afr_lock_cbk_t success_handler,
afr_lock_cbk_t failure_handler);
void
@@ -119,12 +107,38 @@ afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
typedef int
(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr);
+ dict_t *xattr, dict_t *xdata);
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name,
- uuid_t gfid);
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
int
afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, int ret_child, mode_t entry_mode,
- call_frame_t **impunge_frame);
+ int active_source, call_frame_t **impunge_frame);
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this);
+
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count);
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs);
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this));
+
+void
+afr_set_local_for_unhealable (afr_local_t *local);
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
+
+void
+afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);
+
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this);
#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 83920c081..9de26ee56 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -49,6 +40,14 @@
#include "afr-self-heal-common.h"
#include "afr-self-heal-algorithm.h"
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this);
+
+static inline gf_boolean_t
+afr_sh_data_proceed (unsigned int success_count)
+{
+ return (success_count >= AFR_SH_MIN_PARTICIPANTS);
+}
extern int
sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
@@ -63,15 +62,6 @@ int
afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
- afr_fxattrop_cbk_t fxattrop_cbk);
-
-int
-afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr);
-
-int
afr_sh_data_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
@@ -88,7 +78,7 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)
int
afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -101,7 +91,7 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_ERROR,
"flush failed on %s on subvolume %s: %s",
local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
@@ -131,6 +121,11 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
+ if (!sh->healing_fd) {
+ //This happens when file is non-reg
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
call_count = afr_set_elem_count_get (sh->success,
priv->child_count);
local->call_count = call_count;
@@ -151,7 +146,7 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- sh->healing_fd);
+ sh->healing_fd, NULL);
if (!--call_count)
break;
@@ -161,9 +156,28 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
}
int
+afr_sh_dom_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->sh_dom_lock_held)
+ afr_sh_data_unlock (frame, this, priv->sh_domain,
+ afr_sh_data_close);
+ else
+ afr_sh_data_close (frame, this);
+ return 0;
+}
+
+int
afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -195,29 +209,20 @@ afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
+afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_self_heal_t *sh = NULL;
int i = 0;
int call_count = 0;
- int source = 0;
int32_t valid = 0;
- struct iatt stbuf = {0,};
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
+ valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
call_count = afr_set_elem_count_get (sh->success,
priv->child_count);
@@ -237,7 +242,7 @@ afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ &local->loc, stbuf, valid, NULL);
if (!--call_count)
break;
@@ -249,7 +254,7 @@ afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
int
afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -259,9 +264,14 @@ afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
sh = &local->self_heal;
GF_ASSERT (sh->source == child_index);
- if (op_ret != -1)
+ if (op_ret != -1) {
sh->buf[child_index] = *buf;
- afr_sh_data_setattr (frame, this);
+ afr_sh_data_setattr (frame, this, buf);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "time-stamps after self-heal", local->loc.path);
+ afr_sh_data_fail (frame, this);
+ }
return 0;
}
@@ -286,37 +296,51 @@ afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
(void *) (long) sh->source,
priv->children[sh->source],
priv->children[sh->source]->fops->fstat,
- sh->healing_fd);
+ sh->healing_fd, NULL);
return 0;
}
//Fun fact, lock_cbk is being used for both lock & unlock
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
afr_lock_cbk_t lock_cbk)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
sh = &local->self_heal;
+ priv = this->private;
- GF_ASSERT (sh->data_lock_held);
-
- sh->data_lock_held = _gf_false;
+ if (strcmp (dom, this->name) == 0) {
+ sh->data_lock_held = _gf_false;
+ } else if (strcmp (dom, priv->sh_domain) == 0) {
+ sh->sh_dom_lock_held = _gf_false;
+ } else {
+ ret = -1;
+ goto out;
+ }
int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = dom;
afr_unlock (frame, this);
+out:
+ if (ret) {
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_cbk (frame, this);
+ }
return 0;
}
int
afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
sh = &local->self_heal;
@@ -325,9 +349,9 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
"finishing data selfheal of %s", local->loc.path);
if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
+ afr_sh_data_unlock (frame, this, this->name, afr_sh_dom_unlock);
else
- afr_sh_data_close (frame, this);
+ afr_sh_dom_unlock (frame, this);
return 0;
}
@@ -344,34 +368,49 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG,
"finishing failed data selfheal of %s", local->loc.path);
- sh->op_failed = 1;
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_finish (frame, this);
return 0;
}
int
afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
int call_count = 0;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child_index = (long) cookie;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change "
+ "log failed on %s for subvol %s, reason: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local = frame->local;
- sh = &local->self_heal;
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
if (!IA_ISREG (sh->type)) {
afr_sh_data_finish (frame, this);
goto out;
}
GF_ASSERT (sh->old_loop_frame);
- afr_sh_data_lock (frame, this, 0, 0,
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
afr_post_sh_big_lock_success,
afr_post_sh_big_lock_failure);
}
@@ -382,74 +421,95 @@ out:
int
afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+ return 0;
+}
+
+int
+afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ sh = &local->self_heal;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to fsync on "
+ "%s - %s", local->loc.path,
+ priv->children[child_index]->name, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ UNLOCK (&frame->lock);
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ }
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
- gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %"PRIu64,
- frame->root->lk_owner);
- afr_sh_print_pending_matrix (sh->delta_matrix, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_erase_pending (frame, this);
+ }
+ return 0;
+}
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
+/*
+ * Before erasing xattrs, make sure the data is written to disk
+ */
+int
+afr_sh_data_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
+ call_count = sh->active_sinks;
+ if (call_count == 0) {
+ afr_sh_data_erase_pending (frame, this);
+ return 0;
}
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- GF_ASSERT (call_count);
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
+ if (!sh->success[i] || sh->sources[i])
continue;
- gf_log (this->name, GF_LOG_DEBUG,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
+ STACK_WIND_COOKIE (frame, afr_sh_data_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync,
+ sh->healing_fd, 1, NULL);
}
- GF_FREE (erase_xattr);
return 0;
}
-
static struct afr_sh_algorithm *
sh_algo_from_name (xlator_t *this, char *name)
{
int i = 0;
+ if (name == NULL)
+ goto out;
+
while (afr_self_heal_algorithms[i].name) {
if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
return &afr_self_heal_algorithms[i];
@@ -458,17 +518,22 @@ sh_algo_from_name (xlator_t *this, char *name)
i++;
}
+out:
return NULL;
}
static int
-sh_zero_byte_files_exist (afr_self_heal_t *sh, int child_count)
+sh_zero_byte_files_exist (afr_local_t *local, int child_count)
{
- int i;
- int ret = 0;
+ int i = 0;
+ int ret = 0;
+ afr_self_heal_t *sh = NULL;
+ sh = &local->self_heal;
for (i = 0; i < child_count; i++) {
+ if (!local->child_up[i] || sh->child_errno[i])
+ continue;
if (sh->buf[i].ia_size == 0) {
ret = 1;
break;
@@ -495,8 +560,7 @@ afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
if (algo == NULL) {
/* option not set, so fall back on heuristics */
- if ((local->enoent_count != 0)
- || sh_zero_byte_files_exist (sh, priv->child_count)
+ if (sh_zero_byte_files_exist (local, priv->child_count)
|| (sh->file_size <= (priv->data_self_heal_window_size *
this->ctx->page_size))) {
@@ -534,11 +598,12 @@ afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->algo_completion_cbk = afr_sh_data_erase_pending;
+ sh->algo_completion_cbk = afr_sh_data_fsync;
sh->algo_abort_cbk = afr_sh_data_fail;
sh_algo = afr_sh_data_pick_algo (frame, this);
+ sh->algo = sh_algo;
sh_algo->fn (frame, this);
return 0;
@@ -547,38 +612,46 @@ afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
int
afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
int call_count = 0;
int child_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- priv = this->private;
+ priv = this->private;
local = frame->local;
+ sh = &local->self_heal;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_INFO,
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
"ftruncate of %s on subvolume %s failed (%s)",
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- else
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
gf_log (this->name, GF_LOG_DEBUG,
"ftruncate of %s on subvolume %s completed",
local->loc.path,
priv->children[child_index]->name);
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_data_sync_prepare (frame, this);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_sync_prepare (frame, this);
+ }
return 0;
}
@@ -612,7 +685,8 @@ afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
+ sh->healing_fd, sh->file_size,
+ NULL);
if (!--call_count)
break;
@@ -626,6 +700,7 @@ afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
{
afr_private_t *priv = NULL;
int ret = 0;
+ int i = 0;
priv = this->private;
sh->source = afr_sh_select_source (sh->sources, priv->child_count);
@@ -634,6 +709,15 @@ afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
goto out;
}
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == sh->source || sh->child_errno[i])
+ continue;
+
+ if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[sh->source]))
+ sh->sources[i] = 0;
+ }
+
afr_reset_children (sh->fresh_children, priv->child_count);
afr_get_fresh_children (sh->success_children, sh->sources,
sh->fresh_children, priv->child_count);
@@ -643,72 +727,211 @@ out:
return ret;
}
-int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+char*
+afr_get_sizes_str (afr_local_t *local, struct iatt *bufs, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sizes_str = NULL;
+ size_t off = 0;
+ char *fmt_str = "%llu bytes on %s, ";
+ char *child_down = " %s,";
+ char *child_unknown = " %s,";
+ int down_child_present = 0;
+ int down_count = 0;
+ int unknown_count = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is ";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %"PRIu64,
- frame->root->lk_owner);
- nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION, NULL, _gf_false);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ len += snprintf (num, sizeof (num), fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), child_down,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count ++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), child_unknown,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
- afr_sh_data_finish (frame, this);
- return 0;
}
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
+ if (down_child_present) {
+ if (down_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to "
- "resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
+ len++;//for '\0'
- sh->sources[priv->favorite_child] = 1;
+ sizes_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
+ if (!sizes_str)
+ return NULL;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ off += snprintf (sizes_str + off, len - off, fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ }
}
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible "
- "split-brain). Please delete the file from all but "
- "the preferred subvolume.", local->loc.path);
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
- local->govinda_gOvinda = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0) {
+ off += snprintf (sizes_str + off, len - off, child_down,
+ priv->children[i]->name);
+ }
+ }
- afr_sh_data_fail (frame, this);
- return 0;
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_1);
+ }
}
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1) {
+ off += snprintf (sizes_str + off, len - off,
+ child_unknown,
+ priv->children[i]->name);
- afr_sh_data_fail (frame, this);
- return 0;
+ }
+ }
+
+ return sizes_str;
+}
+
+char*
+afr_get_sinks_str (xlator_t *this, afr_local_t *local, afr_self_heal_t *sh)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sinks_str = NULL;
+ char *temp_str = " to sinks ";
+ char *str_format = " %s,";
+ char off = 0;
+
+ priv = this->private;
+
+ len += snprintf (num, sizeof (num), "%s", temp_str);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), str_format,
+ priv->children[i]->name);
+ }
+ }
+
+ len ++;
+
+ sinks_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+
+ if (!sinks_str)
+ return NULL;
+
+ off += snprintf (sinks_str + off, len - off, "%s", temp_str);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ off += snprintf (sinks_str + off, len - off,
+ str_format,
+ priv->children[i]->name);
+ }
}
+ return sinks_str;
+
+}
+
+
+void
+afr_set_data_sh_info_str (afr_local_t *local, afr_self_heal_t *sh, xlator_t *this)
+{
+ char *pending_matrix_str = NULL;
+ char *sizes_str = NULL;
+ char *sinks_str = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ sizes_str = afr_get_sizes_str (local, sh->buf, this);
+ if (!sizes_str)
+ sizes_str = "";
+
+ sinks_str = afr_get_sinks_str (this, local, sh);
+ if (!sinks_str)
+ sinks_str = "";
+
+ gf_asprintf (&sh->data_sh_info, " data self heal from %s %s with "
+ "%s data %s", priv->children[sh->source]->name, sinks_str,
+ sizes_str, pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (sizes_str && strcmp (sizes_str, ""))
+ GF_FREE (sizes_str);
+}
+
+void
+afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+{
+ int source = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
source = sh->source;
sh->block_size = this->ctx->page_size;
sh->file_size = sh->buf[source].ia_size;
@@ -716,17 +939,9 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
if (FILE_HAS_HOLES (&sh->buf[source]))
sh->file_has_holes = 1;
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- if (sh->background && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
+ if (sh->background && sh->unwind && !sh->unwound) {
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
sh->unwound = _gf_true;
}
@@ -736,66 +951,120 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
"no active sinks for performing self-heal on file %s",
local->loc.path);
afr_sh_data_finish (frame, this);
- return 0;
+ return;
}
gf_log (this->name, GF_LOG_DEBUG,
"self-healing file %s from subvolume %s to %d other",
local->loc.path, priv->children[sh->source]->name,
sh->active_sinks);
- afr_sh_data_trim_sinks (frame, this);
- return 0;
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_sh_data_trim_sinks (frame, this);
}
-static void
-afr_destroy_pending_matrix (int32_t **pending_matrix, int32_t child_count)
+int
+afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int nsources = 0;
+ int ret = 0;
+ int *old_sources = NULL;
+ int tstamp_source = 0;
int i = 0;
- GF_ASSERT (child_count > 0);
- if (pending_matrix) {
- for (i = 0; i < child_count; i++) {
- if (pending_matrix[i])
- GF_FREE (pending_matrix[i]);
- }
- GF_FREE (pending_matrix);
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ if (sh->sync_done) {
+ //store sources before sync so that mtime can be set using the
+ //iatt buf from one of them.
+ old_sources = alloca (priv->child_count*sizeof (*old_sources));
+ memcpy (old_sources, sh->sources,
+ priv->child_count * sizeof (*old_sources));
}
-}
-static int32_t**
-afr_create_pending_matrix (int32_t child_count)
-{
- gf_boolean_t cleanup = _gf_false;
- int32_t **pending_matrix = NULL;
- int i = 0;
+ nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
+ sh->sources, sh->success_children,
+ AFR_DATA_TRANSACTION, NULL, _gf_true);
+ if ((nsources == -1)
+ && (priv->favorite_child != -1)
+ && (sh->child_errno[priv->favorite_child] == 0)) {
- GF_ASSERT (child_count > 0);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Picking favorite child %s as authentic source to "
+ "resolve conflicting data of %s",
+ priv->children[priv->favorite_child]->name,
+ local->loc.path);
- pending_matrix = GF_CALLOC (sizeof (*pending_matrix), child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix)
- goto out;
- for (i = 0; i < child_count; i++) {
- pending_matrix[i] = GF_CALLOC (sizeof (**pending_matrix),
- child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix[i]) {
- cleanup = _gf_true;
- goto out;
- }
+ sh->sources[priv->favorite_child] = 1;
+
+ nsources = afr_sh_source_count (sh->sources,
+ priv->child_count);
}
-out:
- if (_gf_true == cleanup) {
- afr_destroy_pending_matrix (pending_matrix, child_count);
- pending_matrix = NULL;
+
+ if (nsources == -1) {
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, SPB);
+
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB);
+
+ ret = afr_sh_inode_set_read_ctx (sh, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No active sources found.");
+
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+
+ if (sh->sync_done) {
+ /* Perform setattr from one of the old_sources if possible
+ * Because only they have the correct mtime, the new sources
+ * (i.e. old sinks) have mtime from last writev in sync.
+ */
+ tstamp_source = sh->source;
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_sources[i] && sh->sources[i])
+ tstamp_source = i;
+ }
+ afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]);
+ } else {
+ afr_set_data_sh_info_str (local, sh, this);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ afr_sh_data_fix (frame, this);
+ else
+ afr_sh_data_finish (frame, this);
}
- return pending_matrix;
+ return 0;
}
int
afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
dict_t **xattr,
- afr_transaction_type txn_type)
+ afr_transaction_type txn_type,
+ uuid_t gfid)
{
afr_private_t *priv = NULL;
int read_child = -1;
@@ -812,19 +1081,33 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
bufs = local->cont.lookup.bufs;
success_children = local->cont.lookup.success_children;
- pending_matrix = afr_create_pending_matrix (priv->child_count);
- if (NULL == pending_matrix)
- goto out;
-
+ pending_matrix = local->cont.lookup.pending_matrix;
sources = local->cont.lookup.sources;
memset (sources, 0, sizeof (*sources) * priv->child_count);
nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
sources, success_children, txn_type,
&subvol_status, _gf_false);
- if (subvol_status & SPLIT_BRAIN)
- gf_log (this->name, GF_LOG_WARNING, "%s: Possible split-brain",
+ if (subvol_status & SPLIT_BRAIN) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: Possible split-brain",
local->loc.path);
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ local->cont.lookup.possible_spb = _gf_true;
+ nsources = 1;
+ sources[success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child (this,
+ success_children,
+ bufs);
+ sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ break;
+ }
+ }
if (nsources < 0)
goto out;
@@ -834,39 +1117,18 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
priv->child_count,
prev_read_child,
config_read_child,
- sources);
+ sources,
+ priv->hash_mode, gfid);
out:
- afr_destroy_pending_matrix (pending_matrix, priv->child_count);
gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
read_child);
return read_child;
}
int
-afr_sh_data_special_file_fix (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count ; i++)
- if (1 == local->child_up[i])
- sh->success[i] = 1;
-
- afr_sh_data_erase_pending (frame, this);
-
- return 0;
-}
-
-int
afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -889,6 +1151,12 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
sh->buf[child_index] = *buf;
sh->success_children[sh->success_count] = child_index;
sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed "
+ "on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
}
}
UNLOCK (&frame->lock);
@@ -899,13 +1167,17 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
/* Previous versions of glusterfs might have set
* the pending data xattrs which need to be erased
*/
- if (IA_ISREG (buf->ia_type))
- afr_sh_data_fix (frame, this);
- else
- afr_sh_data_special_file_fix (frame, this);
-
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "inspecting metadata "
+ "succeeded on < %d children, aborting "
+ "self-heal for %s", AFR_SH_MIN_PARTICIPANTS,
+ local->loc.path);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ afr_sh_data_fxattrop_fstat_done (frame, this);
}
-
+out:
return 0;
}
@@ -916,33 +1188,41 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
+ int call_count = 0;
+ int i = 0;
+ int child = 0;
+ int32_t *fstat_children = NULL;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
- call_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
+ fstat_children = memdup (sh->success_children,
+ sizeof (*fstat_children) * priv->child_count);
+ if (!fstat_children) {
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ call_count = sh->success_count;
local->call_count = call_count;
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
afr_reset_children (sh->success_children, priv->child_count);
sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fstat,
- sh->healing_fd);
-
- if (!--call_count)
- break;
- }
+ child = fstat_children[i];
+ if (child == -1)
+ break;
+ STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
+ (void *) (long) child,
+ priv->children[child],
+ priv->children[child]->fops->fstat,
+ sh->healing_fd, NULL);
+ --call_count;
}
-
+ GF_ASSERT (!call_count);
+out:
+ GF_FREE (fstat_children);
return 0;
}
@@ -971,73 +1251,60 @@ afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
sh->xattr[child_index] = dict_ref (xattr);
sh->success_children[sh->success_count] = child_index;
sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s "
+ "failed on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
}
}
UNLOCK (&frame->lock);
}
int
-afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
{
int call_count = -1;
- int ret = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
- op_errno, xattr);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
- sh = &local->self_heal;
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- (void) afr_build_sources (this, sh->xattr, NULL,
- sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION, NULL, _gf_false);
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret)
- afr_sh_data_fail (frame, this);
- else
- afr_sh_set_timestamps (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- int call_count = -1;
+ sh = &local->self_heal;
afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
op_errno, xattr);
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s, inspecting "
+ "change log succeeded on < %d children",
+ local->loc.path, AFR_SH_MIN_PARTICIPANTS);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
afr_sh_data_fstat (frame, this);
}
-
+out:
return 0;
}
int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
- afr_fxattrop_cbk_t fxattrop_cbk)
+afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
{
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
+ dict_t **xattr_req;
int32_t *zero_pending = NULL;
int call_count = 0;
int i = 0;
int ret = 0;
+ int j;
priv = this->private;
local = frame->local;
@@ -1048,42 +1315,53 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
local->call_count = call_count;
- xattr_req = dict_new();
- if (!xattr_req) {
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
- gf_afr_mt_int32_t);
- if (!zero_pending) {
- ret = -1;
- goto out;
- }
- ret = dict_set_dynptr (xattr_req, priv->pending_key[i],
- zero_pending,
- 3 * sizeof (*zero_pending));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value");
- goto out;
- } else {
- zero_pending = NULL;
- }
- }
+ xattr_req = GF_CALLOC(priv->child_count, sizeof(struct dict_t *),
+ gf_afr_mt_dict_t);
+ if (!xattr_req)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ xattr_req[i] = dict_new();
+ if (!xattr_req[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
+ gf_afr_mt_int32_t);
+ if (!zero_pending) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynptr (xattr_req[i], priv->pending_key[j],
+ zero_pending,
+ 3 * sizeof (*zero_pending));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value");
+ goto out;
+ } else {
+ zero_pending = NULL;
+ }
+ }
+ }
afr_reset_xattr (sh->xattr, priv->child_count);
afr_reset_children (sh->success_children, priv->child_count);
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, fxattrop_cbk,
+ STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fxattrop,
sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req);
+ xattr_req[i], NULL);
if (!--call_count)
break;
@@ -1091,14 +1369,16 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
}
out:
- if (xattr_req)
- dict_unref (xattr_req);
+ if (xattr_req) {
+ for (i = 0; i < priv->child_count; i++)
+ if (xattr_req[i])
+ dict_unref(xattr_req[i]);
+ GF_FREE(xattr_req);
+ }
if (ret) {
- if (zero_pending)
- GF_FREE (zero_pending);
- sh->op_failed = 1;
- afr_sh_data_done (frame, this);
+ GF_FREE (zero_pending);
+ afr_sh_data_fail (frame, this);
}
return 0;
@@ -1114,7 +1394,23 @@ afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this, afr_sh_data_fxattrop_cbk);
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
+
+int
+afr_sh_dom_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_dom_lock_held = _gf_true;
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_sh_data_big_lock_success,
+ afr_sh_data_fail);
return 0;
}
@@ -1131,14 +1427,16 @@ afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
- "failed for %s. by %"PRIu64,
- local->loc.path, frame->root->lk_owner);
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
sh->data_lock_failure_handler (frame, this);
} else {
gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
- "done for %s by %"PRIu64". Proceding to self-heal",
- local->loc.path, frame->root->lk_owner);
+ "done for %s by %s. Proceding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
sh->data_lock_success_handler (frame, this);
}
@@ -1158,15 +1456,21 @@ afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "failed for %s. by %"PRIu64,
- local->loc.path, frame->root->lk_owner);
- int_lock->lock_cbk = afr_sh_data_post_blocking_inodelk_cbk;
- afr_blocking_lock (frame, this);
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ if (!sh->data_lock_block) {
+ sh->data_lock_failure_handler(frame, this);
+ } else {
+ int_lock->lock_cbk =
+ afr_sh_data_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ }
} else {
gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "done for %s by %"PRIu64". Proceeding to self-heal",
- local->loc.path, frame->root->lk_owner);
+ "done for %s by %s. Proceeding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
sh->data_lock_success_handler (frame, this);
}
@@ -1174,9 +1478,11 @@ afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t len)
+afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, char *dom,
+ off_t start, off_t len)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
local = frame->local;
@@ -1187,11 +1493,14 @@ afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t le
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = start;
- int_lock->lk_flock.l_len = len;
- int_lock->lk_flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
+ int_lock->domain = dom;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->flock.l_start = start;
+ inodelk->flock.l_len = len;
+ inodelk->flock.l_type = F_WRLCK;
+
afr_nonblocking_inodelk (frame, this);
return 0;
@@ -1210,7 +1519,8 @@ afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
sh_loop_finish (sh->old_loop_frame, this);
sh->old_loop_frame = NULL;
sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this, afr_post_sh_data_fxattrop_cbk);
+ sh->sync_done = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
return 0;
}
@@ -1233,8 +1543,8 @@ afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
int
afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
- afr_lock_cbk_t success_handler,
+ off_t start, off_t len, gf_boolean_t block,
+ char *dom, afr_lock_cbk_t success_handler,
afr_lock_cbk_t failure_handler)
{
afr_local_t * local = NULL;
@@ -1245,12 +1555,13 @@ afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
sh->data_lock_success_handler = success_handler;
sh->data_lock_failure_handler = failure_handler;
- return afr_sh_data_lock_rec (frame, this, start, len);
+ sh->data_lock_block = block;
+ return afr_sh_data_lock_rec (frame, this, dom, start, len);
}
int
afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -1276,20 +1587,20 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "open of %s succeeded on child %s",
+ local->loc.path,
+ priv->children[child_index]->name);
}
-
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_data_fail (frame, this);
return 0;
}
@@ -1298,9 +1609,8 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"fd for %s opened, commencing sync",
local->loc.path);
- afr_sh_data_lock (frame, this, 0, 0,
- afr_sh_data_big_lock_success,
- afr_sh_data_fail);
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, priv->sh_domain,
+ afr_sh_dom_lock_success, afr_sh_data_fail);
}
return 0;
@@ -1337,7 +1647,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->open,
&local->loc,
- O_RDWR|O_LARGEFILE, fd, 0);
+ O_RDWR|O_LARGEFILE, fd, NULL);
if (!--call_count)
break;
@@ -1346,20 +1656,93 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
return 0;
}
+void
+afr_sh_non_reg_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ if (op_ret < 0) {
+ afr_sh_data_fail (frame, this);
+ return;
+ }
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count ; i++) {
+ if (1 == local->child_up[i])
+ sh->success[i] = 1;
+ }
+
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+}
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ sh->data_lock_held = _gf_true;
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_non_reg_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ return 0;
+}
+gf_boolean_t
+afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
if (sh->do_data_self_heal &&
- afr_data_self_heal_enabled (priv->data_self_heal)) {
- afr_sh_data_open (frame, this);
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ return _gf_true;
+ return _gf_false;
+}
+
+int
+afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = this->private;
+ int ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
+
+ if (afr_can_start_data_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[1],
+ priv->sh_domain, priv->child_count);
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+
+ if (IA_ISREG (sh->type)) {
+ afr_sh_data_open (frame, this);
+ } else {
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true,
+ this->name,
+ afr_sh_non_reg_lock_success,
+ afr_sh_data_fail);
+ }
} else {
gf_log (this->name, GF_LOG_TRACE,
"not doing data self heal on %s",
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index ba29656e2..53491a1d7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -59,6 +50,9 @@
} while (0);
int
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
+int
afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
@@ -67,10 +61,6 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
-
sh->completion_cbk (frame, this);
return 0;
@@ -112,7 +102,7 @@ afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
int
afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
long i = 0;
int call_count = 0;
@@ -167,72 +157,20 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
if (sh->entries_skipped) {
- need_unwind = 1;
- sh->op_failed = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
goto out;
}
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
+ afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
+ afr_sh_entry_erase_pending_cbk,
+ afr_sh_entry_finish);
+ return 0;
out:
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
-
+ afr_sh_entry_finish (frame, this);
return 0;
}
@@ -317,8 +255,7 @@ int
afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this);
int
afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
@@ -346,7 +283,8 @@ int
afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
@@ -382,7 +320,7 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
@@ -410,7 +348,6 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
}
valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- afr_build_parent_loc (&expunge_sh->parent_loc, &expunge_local->loc);
STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
(void *) (long) active_src,
@@ -418,7 +355,7 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
priv->children[active_src]->fops->setattr,
&expunge_sh->parent_loc,
&expunge_sh->parentbuf,
- valid);
+ valid, NULL);
return 0;
}
@@ -442,7 +379,7 @@ afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->unlink,
- &expunge_local->loc);
+ &expunge_local->loc, 0, NULL);
return 0;
}
@@ -467,7 +404,7 @@ afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->rmdir,
- &expunge_local->loc, 1);
+ &expunge_local->loc, 1, NULL);
return 0;
}
@@ -591,7 +528,7 @@ afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
+ &expunge_local->loc, NULL);
return 0;
}
@@ -643,7 +580,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
if (need_expunge) {
gf_log (this->name, GF_LOG_INFO,
- "missing entry %s on %s",
+ "Entry %s is missing on %s and deleting from "
+ "replica's other bricks",
expunge_local->loc.path,
priv->children[source]->name);
@@ -675,6 +613,19 @@ out:
return 0;
}
+static gf_boolean_t
+can_skip_entry_self_heal (char *name, loc_t *parent_loc)
+{
+ if (strcmp (name, ".") == 0) {
+ return _gf_true;
+ } else if (strcmp (name, "..") == 0) {
+ return _gf_true;
+ } else if (loc_is_root (parent_loc) &&
+ (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
int
afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
@@ -702,15 +653,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
sh->expunge_done = afr_sh_entry_expunge_entry_done;
name = entry->d_name;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
+ if (can_skip_entry_self_heal (name, &local->loc)) {
op_ret = 0;
goto out;
}
@@ -725,16 +668,17 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
expunge_frame->local = expunge_local;
expunge_sh = &expunge_local->self_heal;
expunge_sh->sh_frame = frame;
expunge_sh->active_source = active_src;
expunge_sh->entrybuf = entry->d_stat;
+ loc_copy (&expunge_sh->parent_loc, &local->loc);
ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
- name, entry->d_stat.ia_gfid);
+ name);
if (ret != 0) {
op_errno = EINVAL;
goto out;
@@ -749,7 +693,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
(void *) (long) source,
priv->children[source],
priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
+ &expunge_local->loc, NULL);
ret = 0;
out:
@@ -764,7 +708,7 @@ int
afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -832,7 +776,7 @@ afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
priv->children[active_src],
priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
return 0;
}
@@ -862,7 +806,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_sink (frame, this, sh->active_source);
sh->active_source = active_src;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
goto out;
}
@@ -887,15 +831,19 @@ out:
int
afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_ret, int32_t op_errno)
{
int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ sh->entries_skipped = _gf_true;
call_count = afr_frame_return (frame);
-
if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ afr_sh_entry_impunge_subvol (frame, this);
return 0;
}
@@ -909,22 +857,20 @@ afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
afr_self_heal_t *sh = NULL;
afr_self_heal_t *impunge_sh = NULL;
call_frame_t *frame = NULL;
- int32_t impunge_ret_child = 0;
AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
frame, local, sh);
- impunge_ret_child = impunge_sh->impunge_ret_child;
AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
+ sh->impunge_done (frame, this, op_ret, op_errno);
}
int
afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
int call_count = 0;
afr_private_t *priv = NULL;
@@ -936,7 +882,7 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
child_index = (long) cookie;
if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"setattr done for %s on %s",
impunge_local->loc.path,
priv->children[child_index]->name);
@@ -948,31 +894,114 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
strerror (op_errno));
}
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ 0, op_errno);
}
- UNLOCK (&impunge_frame->lock);
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
+ return 0;
+}
+
+int
+afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *setattr_local = NULL;
+
+ setattr_local = setattr_frame->local;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr on parent directory (%s) failed: %s",
+ setattr_local->loc.path, strerror (op_errno));
+ }
+ call_count = afr_frame_return (setattr_frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (setattr_frame);
return 0;
}
+int
+afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *setattr_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *setattr_frame = NULL;
+ int32_t valid = 0;
+ int32_t op_errno = 0;
+ int child_index = 0;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting ownership of %s on %s to %d/%d",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ impunge_sh->entrybuf.ia_uid,
+ impunge_sh->entrybuf.ia_gid);
+
+ setattr_frame = copy_frame (impunge_frame);
+ if (!setattr_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out);
+ setattr_local = setattr_frame->local;
+ call_count = afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0);
+ loc_copy (&setattr_local->loc, &impunge_sh->parent_loc);
+ impunge_local->call_count = call_count;
+ setattr_local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (impunge_sh->child_errno[i])
+ continue;
+ valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (setattr_frame,
+ afr_sh_entry_impunge_parent_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &setattr_local->loc,
+ &impunge_sh->parentbuf, valid, NULL);
+
+ valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &impunge_local->loc,
+ &impunge_sh->entrybuf, valid, NULL);
+ call_count--;
+ }
+ GF_ASSERT (!call_count);
+ return 0;
+out:
+ if (setattr_frame)
+ AFR_STACK_DESTROY (setattr_frame);
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno);
+ return 0;
+}
int
afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
int child_index = 0;
- struct iatt stbuf = {0};
- int32_t valid = 0;
priv = this->private;
impunge_local = impunge_frame->local;
@@ -985,55 +1014,59 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
impunge_local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
+ goto out;
}
- gf_log (this->name, GF_LOG_TRACE,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.ia_uid,
- impunge_local->cont.lookup.buf.ia_gid);
-
- stbuf.ia_atime = impunge_local->cont.lookup.buf.ia_atime;
- stbuf.ia_atime_nsec = impunge_local->cont.lookup.buf.ia_atime_nsec;
- stbuf.ia_mtime = impunge_local->cont.lookup.buf.ia_mtime;
- stbuf.ia_mtime_nsec = impunge_local->cont.lookup.buf.ia_mtime_nsec;
-
- stbuf.ia_uid = impunge_local->cont.lookup.buf.ia_uid;
- stbuf.ia_gid = impunge_local->cont.lookup.buf.ia_gid;
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &impunge_local->loc,
- &stbuf, valid);
+ afr_sh_entry_impunge_setattr (impunge_frame, this);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
-
int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
+ xlator_t *this)
{
- loc_t *parent_loc = cookie;
+ int active_src = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr on parent directory (%s) failed: %s",
- parent_loc->path, strerror (op_errno));
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ afr_prepare_new_entry_pending_matrix (impunge_local->pending,
+ afr_is_errno_unset,
+ impunge_sh->child_errno,
+ &impunge_sh->entrybuf,
+ priv->child_count);
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto out;
}
- loc_wipe (parent_loc);
+ afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src,
+ LOCAL_LAST);
- GF_FREE (parent_loc);
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->xattrop,
+ &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
- AFR_STACK_DESTROY (setattr_frame);
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
@@ -1043,124 +1076,165 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int call_count = 0;
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
int child_index = 0;
- int32_t *pending_array = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- loc_t *parent_loc = NULL;
- struct iatt parentbuf = {0,};
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
child_index = (long) cookie;
if (op_ret == -1) {
- ret = -1;
+ impunge_sh->child_errno[child_index] = op_errno;
gf_log (this->name, GF_LOG_ERROR,
"creation of %s on %s failed (%s)",
impunge_local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- goto out;
+ } else {
+ impunge_sh->child_errno[child_index] = 0;
}
- inode->ia_type = stbuf->ia_type;
-
- xattr = dict_new ();
- if (!xattr) {
- ret = -1;
- goto out;
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ if (!afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0)) {
+ // new_file creation failed every where
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
+ goto out;
+ }
+ afr_sh_entry_impunge_perform_xattrop (impunge_frame, this);
}
+out:
+ return 0;
+}
- pending_array = (int32_t*) GF_CALLOC (3, sizeof (*pending_array),
- gf_afr_mt_int32_t);
+int
+afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
- if (!pending_array) {
- ret = -1;
- goto out;
- }
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
- /* Pending data xattrs shouldn't be set for special files
- */
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (IA_ISDIR (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else if (IA_ISREG (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- else
- goto cont;
- pending_array[idx] = hton32 (1);
-
-cont:
- ret = dict_set_dynptr (xattr, priv->pending_key[child_index],
- pending_array,
- 3 * sizeof (*pending_array));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- } else {
- pending_array = NULL;
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ //For symlinks impunge is attempted un-conditionally
+ //So the file can already exist.
+ if ((op_ret < 0) && (op_errno == EEXIST))
+ op_ret = 0;
}
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parentbuf = impunge_sh->parentbuf;
- setattr_frame = copy_frame (impunge_frame);
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- if (!parent_loc) {
- ret = -1;
- goto out;
- }
- afr_build_parent_loc (parent_loc, &impunge_local->loc);
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
+ return 0;
+}
- STACK_WIND_COOKIE (setattr_frame, afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &parentbuf, valid);
+int
+afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ loc_t *loc = NULL;
+ struct iatt *buf = NULL;
+ loc_t oldloc = {0};
-out:
- if (xattr)
- dict_unref (xattr);
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc = &impunge_local->loc;
+ buf = &impunge_sh->entrybuf;
- if (ret) {
- if (pending_array)
- GF_FREE (pending_array);
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, buf->ia_gfid);
+ gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s",
+ loc->path, priv->children[child_index]->name);
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->link,
+ &oldloc, loc, NULL);
+ loc_wipe (&oldloc);
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- }
+ return 0;
+}
+int
+afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ if (op_ret < 0) {
+ afr_sh_entry_impunge_create_file (impunge_frame, this,
+ (long)cookie);
+ } else {
+ afr_sh_entry_impunge_hardlink (impunge_frame, this,
+ (long)cookie);
+ }
return 0;
}
+int
+afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame,
+ xlator_t *this,
+ int child_index, struct iatt *stbuf)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ loc_t *loc = NULL;
+ dict_t *xattr_req = NULL;
+ loc_t oldloc = {0};
+ int ret = -1;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ loc = &impunge_local->loc;
+
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ goto out;
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, stbuf->ia_gfid);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lookup,
+ &oldloc, xattr_req);
+ ret = 0;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ loc_wipe (&oldloc);
+ if (ret)
+ sh->impunge_done (frame, this, -1, ENOMEM);
+ return 0;
+}
int
afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
@@ -1189,6 +1263,35 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
impunge_local->loc.path);
+ /*
+ * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY :
+ *
+ * Problem:
+ * While a brick is down in a replica pair, lets say the user creates
+ * one file(file-A) and a hard link to that file(h-file-A). After the
+ * brick comes back up, entry self-heal is attempted on parent dir of
+ * these two files. As part of readdir in self-heal it reads both the
+ * entries file-A and h-file-A for both of them it does name less lookup
+ * to check if there are any hardlinks already present in the
+ * destination brick. It finds that there are no hard links already
+ * present for files file-A, h-file-A. Self-heal does mknods for both
+ * file-A and h-file-A. This leads to file-A and h-file-A not being
+ * hardlinks anymore.
+ *
+ * Fix: (More like shrinking of race-window, the race itself is still
+ * present in posix-mknod).
+ * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then
+ * posix_mknod checks if there are already any gfid-links and does
+ * link() instead of mknod. There still can be a race where two
+ * posix_mknods same gfid see that
+ * gfid-link file is not present and proceeds with mknods and result in
+ * two different files with same gfid.
+ */
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: %s set failed",
+ impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY);
+
STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
(void *) (long) child_index,
priv->children[child_index],
@@ -1196,7 +1299,7 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
&impunge_local->loc,
st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
makedev (ia_major (stbuf->ia_rdev),
- ia_minor (stbuf->ia_rdev)), dict);
+ ia_minor (stbuf->ia_rdev)), 0, dict);
if (dict)
dict_unref (dict);
@@ -1243,7 +1346,7 @@ afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
priv->children[child_index]->fops->mkdir,
&impunge_local->loc,
st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- dict);
+ 0, dict);
if (dict)
dict_unref (dict);
@@ -1265,7 +1368,7 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
priv = this->private;
impunge_local = impunge_frame->local;
- buf = &impunge_local->cont.symlink.buf;
+ buf = &impunge_local->cont.dir_fop.buf;
dict = dict_new ();
if (!dict) {
@@ -1290,7 +1393,7 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc, dict);
+ linkname, &impunge_local->loc, 0, dict);
if (dict)
dict_unref (dict);
@@ -1304,7 +1407,7 @@ afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
@@ -1365,7 +1468,7 @@ afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->unlink,
- &impunge_local->loc);
+ &impunge_local->loc, 0, NULL);
return 0;
}
@@ -1375,7 +1478,7 @@ int
afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
@@ -1457,7 +1560,7 @@ afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096);
+ &impunge_local->loc, 4096, NULL);
return 0;
}
@@ -1467,7 +1570,7 @@ int
afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
@@ -1525,36 +1628,84 @@ afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
active_src = impunge_sh->active_source;
- impunge_local->cont.symlink.buf = *stbuf;
+ impunge_local->cont.dir_fop.buf = *stbuf;
STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
(void *) (long) child_index,
priv->children[active_src],
priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
+ &impunge_local->loc, 4096, NULL);
return 0;
}
int
afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent)
+ int child_index)
{
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
ia_type_t type = IA_INVAL;
- int ret = 0;
int active_src = 0;
+ struct iatt *buf = NULL;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->parentbuf = *postparent;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
active_src = impunge_sh->active_source;
- impunge_local->cont.lookup.buf = *buf;
- afr_update_loc_gfids (&impunge_local->loc, buf, postparent);
+ afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf,
+ &impunge_sh->parentbuf);
+
+ buf = &impunge_sh->entrybuf;
+ type = buf->ia_type;
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ afr_sh_entry_impunge_check_hardlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ case IA_IFDIR:
+ afr_sh_entry_impunge_mkdir (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ impunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ sh->impunge_done (frame, this, -1, EINVAL);
+ break;
+ }
+
+ return 0;
+}
+
+int
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ buf = &impunge_sh->entrybuf;
type = buf->ia_type;
switch (type) {
@@ -1570,41 +1721,30 @@ afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
afr_sh_entry_impunge_readlink (impunge_frame, this,
child_index, buf);
break;
- case IA_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
default:
gf_log (this->name, GF_LOG_ERROR,
"%s has unknown file type on %s: 0%o",
impunge_local->loc.path,
priv->children[active_src]->name, type);
- ret = -1;
+ sh->impunge_done (frame, this, -1, EINVAL);
break;
}
- return ret;
+ return 0;
}
gf_boolean_t
-afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources,
- unsigned int child, unsigned int child_count)
+afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child,
+ unsigned int child_count)
{
- int32_t *success_children = NULL;
gf_boolean_t recreate = _gf_false;
- GF_ASSERT (impunge_sh->impunging_entry_mode);
GF_ASSERT (impunge_sh->child_errno);
- GF_ASSERT (sources);
- success_children = impunge_sh->success_children;
- if (child == impunge_sh->active_source) {
- GF_ASSERT (afr_is_child_present (success_children,
- child_count, child));
+ if (child == impunge_sh->active_source)
goto out;
- }
- if (IA_ISLNK (impunge_sh->impunging_entry_mode)) {
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
recreate = _gf_true;
goto out;
}
@@ -1623,7 +1763,7 @@ afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
int i = 0;
for (i = 0; i < child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sources, i, child_count))
+ if (afr_sh_need_recreate (impunge_sh, i, child_count))
count++;
}
@@ -1640,8 +1780,6 @@ afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
call_frame_t *frame = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
unsigned int recreate_count = 0;
int i = 0;
int active_src = 0;
@@ -1649,24 +1787,34 @@ afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
priv = this->private;
AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
frame, local, sh);
- active_src = impunge_sh->active_source;
- buf = &impunge_sh->buf[active_src];
- postparent = &impunge_sh->parentbufs[active_src];
-
+ active_src = impunge_sh->active_source;
+ impunge_sh->entrybuf = impunge_sh->buf[active_src];
+ impunge_sh->parentbuf = impunge_sh->parentbufs[active_src];
recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
priv->child_count);
- GF_ASSERT (recreate_count);
+ if (!recreate_count) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0);
+ goto out;
+ }
impunge_local->call_count = recreate_count;
for (i = 0; i < priv->child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sh->sources, i,
- priv->child_count)) {
- (void)afr_sh_entry_impunge_create (impunge_frame, this,
- i, buf,
- postparent);
- recreate_count--;
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
}
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count))
+ continue;
+ (void)afr_sh_entry_impunge_create (impunge_frame, this, i);
+ recreate_count--;
}
GF_ASSERT (!recreate_count);
+out:
return 0;
}
@@ -1680,7 +1828,6 @@ afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
call_frame_t *frame = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- unsigned int recreate_count = 0;
unsigned int gfid_miss_count = 0;
unsigned int children_up_count = 0;
uuid_t gfid = {0};
@@ -1729,15 +1876,9 @@ afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
afr_sh_entry_common_lookup_done, gfid,
AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
} else {
- recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
- priv->child_count);
- if (!recreate_count) {
- op_ret = 0;
- op_errno = 0;
- goto done;
- }
afr_sh_entry_call_impunge_recreate (impunge_frame, this);
}
return;
@@ -1753,13 +1894,13 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
int ret = -1;
call_frame_t *impunge_frame = NULL;
afr_local_t *impunge_local = NULL;
int active_src = 0;
int op_errno = 0;
int op_ret = -1;
- mode_t entry_mode = 0;
local = frame->local;
sh = &local->self_heal;
@@ -1767,14 +1908,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
active_src = sh->active_source;
sh->impunge_done = afr_sh_entry_impunge_entry_done;
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
+ if (can_skip_entry_self_heal (entry->d_name, &local->loc)) {
op_ret = 0;
goto out;
}
@@ -1783,18 +1917,18 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
"inspecting existence of %s under %s",
entry->d_name, local->loc.path);
- entry_mode = st_mode_from_ia (entry->d_stat.ia_prot,
- entry->d_stat.ia_type);
- ret = afr_impunge_frame_create (frame, this, active_src, active_src,
- entry_mode, &impunge_frame);
+ ret = afr_impunge_frame_create (frame, this, active_src,
+ &impunge_frame);
if (ret) {
op_errno = -ret;
goto out;
}
impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
- entry->d_name, entry->d_stat.ia_gfid);
+ entry->d_name);
+ loc_copy (&impunge_sh->parent_loc, &local->loc);
if (ret != 0) {
op_errno = ENOMEM;
goto out;
@@ -1802,14 +1936,14 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
afr_sh_entry_common_lookup_done, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS);
+ AFR_LOOKUP_FAIL_CONFLICTS, NULL);
op_ret = 0;
out:
if (ret) {
if (impunge_frame)
AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, active_src, op_ret, op_errno);
+ sh->impunge_done (frame, this, op_ret, op_errno);
}
return 0;
@@ -1820,7 +1954,7 @@ int
afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -1843,6 +1977,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
local->loc.path,
priv->children[active_src]->name,
strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"readdir of %s on subvolume %s complete",
@@ -1859,7 +1994,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
entry_count++;
}
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"readdir'ed %d entries from %s",
entry_count, priv->children[active_src]->name);
@@ -1875,21 +2010,24 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ int32_t active_src = 0;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ active_src = sh->active_source;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd",
+ local->loc.path, sh->offset);
STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
priv->children[active_src],
priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
return 0;
}
@@ -1912,7 +2050,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_source (frame, this, sh->active_source);
sh->active_source = active_src;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -1927,7 +2065,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
"impunging entries of %s on %s to other sinks",
local->loc.path, priv->children[active_src]->name);
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ afr_sh_entry_impunge_subvol (frame, this);
return 0;
}
@@ -1935,7 +2073,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
int
afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -1961,7 +2099,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
}
UNLOCK (&frame->lock);
@@ -1969,7 +2107,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -2007,7 +2145,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
source = local->self_heal.source;
sources = local->self_heal.sources;
- sh->block_size = 65536; //131072
+ sh->block_size = priv->sh_readdir_size;
sh->offset = 0;
call_count = sh->active_sinks;
@@ -2029,7 +2167,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
(void *) (long) source,
priv->children[source],
priv->children[source]->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
call_count--;
}
@@ -2046,7 +2184,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
if (!--call_count)
break;
@@ -2101,6 +2239,8 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
"merging all entries as a conservative decision",
local->loc.path);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
afr_sh_entry_open (frame, this);
return 0;
@@ -2123,7 +2263,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_entry_finish (frame, this);
goto out;
@@ -2186,7 +2326,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
"failed for %s.", local->loc.path);
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_entry_done (frame, this);
} else {
@@ -2195,7 +2335,8 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
afr_sh_common_lookup (frame, this, &local->loc,
afr_sh_entry_fix, NULL,
AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
}
return 0;
@@ -2204,14 +2345,18 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
+ afr_self_heal_t *sh = NULL;
priv = this->private;
local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_entrylk (frame, this, &local->loc, NULL,
afr_sh_post_nonblocking_entry_cbk);
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 148e858bd..fd5da6cfd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -54,31 +45,17 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
- memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
- afr_reset_xattr (sh->xattr, priv->child_count);
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_INFO,
- "split-brain detected, aborting selfheal of %s",
+ afr_sh_reset (frame, this);
+ if (IA_ISDIR (sh->type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to entry check on %s",
local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
+ afr_self_heal_entry (frame, this);
} else {
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
gf_log (this->name, GF_LOG_DEBUG,
"proceeding to data check on %s",
local->loc.path);
@@ -111,11 +88,24 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
return 0;
}
+int
+afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_metadata_finish (frame, this);
+ return 0;
+}
int
afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = 0;
@@ -147,85 +137,19 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
return 0;
}
-
int
afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
- if (!erase_xattr)
- return -ENOMEM;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
+ afr_sh_erase_pending (frame, this, AFR_METADATA_TRANSACTION,
+ afr_sh_metadata_erase_pending_cbk,
+ afr_sh_metadata_finish);
+ return 0;
}
int
afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -256,8 +180,13 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
- if (call_count == 0)
+ if (call_count == 0) {
+ if (local->xattr_req) {
+ dict_unref (local->xattr_req);
+ local->xattr_req = NULL;
+ }
afr_sh_metadata_erase_pending (frame, this);
+ }
return 0;
}
@@ -266,9 +195,9 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -276,13 +205,93 @@ afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+afr_sh_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
+
+ i = (long) cookie;
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc, local->xattr_req, 0, NULL);
+
+ out:
return 0;
}
+inline void
+afr_prune_special_keys (dict_t *xattr_dict)
+{
+ dict_del (xattr_dict, GF_SELINUX_XATTR_KEY);
+}
+
+inline void
+afr_prune_pending_keys (dict_t *xattr_dict, afr_private_t *priv)
+{
+ int i = 0;
+
+ for (; i < priv->child_count; i++) {
+ dict_del (xattr_dict, priv->pending_key[i]);
+ }
+}
+
+int
+afr_sh_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
+
+ afr_prune_pending_keys (xattr, priv);
+
+ afr_prune_special_keys (xattr);
+
+ i = (long) cookie;
+
+ /* send removexattr in bulk via xdata */
+ STACK_WIND_COOKIE (frame, afr_sh_removexattr_cbk,
+ cookie,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc, "", xattr);
+
+ out:
+ return 0;
+}
int
afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
@@ -308,9 +317,10 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
/*
* 2 calls per sink - setattr, setxattr
*/
- if (xattr)
+ if (xattr) {
call_count = active_sinks * 2;
- else
+ local->xattr_req = dict_ref (xattr);
+ } else
call_count = active_sinks;
local->call_count = call_count;
@@ -346,18 +356,18 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ &local->loc, &stbuf, valid, NULL);
call_count--;
if (!xattr)
continue;
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ STACK_WIND_COOKIE (frame, afr_sh_getxattr_cbk,
(void *) (long) i,
priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
+ priv->children[i]->fops->getxattr,
+ &local->loc, NULL, NULL);
call_count--;
}
@@ -366,17 +376,15 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int source = 0;
- int i;
-
local = frame->local;
sh = &local->self_heal;
priv = this->private;
@@ -391,16 +399,147 @@ afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
afr_sh_metadata_sync (frame, this, NULL);
} else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
+ afr_prune_pending_keys (xattr, priv);
afr_sh_metadata_sync (frame, this, xattr);
}
return 0;
}
+static void
+afr_set_metadata_sh_info_str (afr_local_t *local, afr_self_heal_t *sh,
+ xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *string = NULL;
+ size_t off = 0;
+ char *source_child = " from source %s to";
+ char *format = " %s, ";
+ char *string_msg = " metadata self heal";
+ char *pending_matrix_str = NULL;
+ int down_child_present = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
+ int down_count = 0;
+ int unknown_count = 0;
+
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ len += snprintf (num, sizeof (num), "%s", string_msg);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), source_child,
+ priv->children[i]->name);
+ } else if ((local->child_up[i] == 1) && (sh->sources[i] == 0)) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ len ++;
+
+ string = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+ if (!string)
+ return;
+
+ off += snprintf (string + off, len - off, "%s", string_msg);
+ for (i=0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1))
+ off += snprintf (string + off, len - off, source_child,
+ priv->children[i]->name);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((local->child_up[i] == 1)&& (sh->sources[i] == 0))
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ gf_asprintf (&sh->metadata_sh_info, "%s metadata %s,", string,
+ pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (string && strcmp (string, ""))
+ GF_FREE (string);
+}
int
afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
@@ -430,10 +569,13 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
local->loc.path, priv->children[source]->name,
sh->active_sinks);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_set_metadata_sh_info_str (local, sh, this);
STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
priv->children[source],
priv->children[source]->fops->getxattr,
- &local->loc, NULL);
+ &local->loc, NULL, NULL);
return 0;
}
@@ -455,7 +597,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_metadata_finish (frame, this);
goto out;
@@ -464,15 +606,6 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
sh->pending_matrix, sh->sources,
sh->success_children,
AFR_METADATA_TRANSACTION, NULL, _gf_false);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -489,12 +622,18 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
}
if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, SPB, DONT_KNOW);
+ afr_sh_metadata_fail (frame, this);
+ goto out;
+ }
- local->govinda_gOvinda = 1;
+ afr_set_split_brain (this, sh->inode, NO_SPB, DONT_KNOW);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
afr_sh_metadata_finish (frame, this);
goto out;
@@ -533,7 +672,10 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
sh->fresh_children);
}
- afr_sh_metadata_sync_prepare (frame, this);
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ afr_sh_metadata_sync_prepare (frame, this);
+ else
+ afr_sh_metadata_finish (frame, this);
out:
return;
}
@@ -549,9 +691,9 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
int_lock = &local->internal_lock;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking metadata "
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
"inodelks failed for %s.", local->loc.path);
- gf_log (this->name, GF_LOG_ERROR, "Metadata self-heal "
+ gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal "
"failed for %s.", local->loc.path);
afr_sh_metadata_done (frame, this);
} else {
@@ -562,7 +704,8 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
afr_sh_common_lookup (frame, this, &local->loc,
afr_sh_metadata_fix, NULL,
AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
}
return 0;
@@ -572,19 +715,22 @@ int
afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ int_lock->domain = this->name;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = LLONG_MAX - 1;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_start = LLONG_MAX - 1;
+ inodelk->flock.l_len = 0;
+ inodelk->flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
afr_nonblocking_inodelk (frame, this);
@@ -592,17 +738,29 @@ afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
return 0;
}
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = this->private;
-
+ afr_self_heal_t *sh = &local->self_heal;
local = frame->local;
+ sh = &local->self_heal;
+ sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
- if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) {
+ if (afr_can_start_metadata_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_metadata_lock (frame, this);
} else {
afr_sh_metadata_done (frame, this);
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index f40c06faa..7c9bc8111 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_H__
@@ -30,13 +21,6 @@
#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this);
-int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this);
-int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this);
-
-int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
int
@@ -54,5 +38,6 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
int
afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
dict_t **xattr,
- afr_transaction_type txn_type);
+ afr_transaction_type txn_type,
+ uuid_t gfid);
#endif /* __AFR_SELF_HEAL_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index c75b7fa92..1b48a1bca 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -25,9 +16,1076 @@
#include "syncop.h"
#include "afr-self-heald.h"
#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+#include "event-history.h"
+
+typedef enum {
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+} afr_crawl_flags_t;
+
+typedef enum {
+ HEAL = 1,
+ INFO,
+ STATISTICS_TO_BE_HEALED,
+} shd_crawl_op;
+
+typedef struct shd_dump {
+ dict_t *dict;
+ xlator_t *this;
+ int child;
+} shd_dump_t;
+
+typedef struct shd_event_ {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct shd_pos_ {
+ int child;
+ xlator_t *this;
+ afr_child_pos_t pos;
+} shd_pos_t;
+
+typedef int
+(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done);
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+
+/* For calling straight through (e.g. already in a synctask). */
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos);
+
+/* For deferring through a new synctask. */
+int
+afr_syncop_find_child_position (void *data);
+
+static int
+_loc_assign_gfid_path (loc_t *loc)
+{
+ int ret = -1;
+ char gfid_path[64] = {0};
+
+ if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
+ ret = inode_path (loc->inode, NULL, (char**)&loc->path);
+ } else if (!uuid_is_null (loc->gfid)) {
+ snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>",
+ uuid_utoa (loc->gfid));
+ loc->path = gf_strdup (gfid_path);
+ if (loc->path)
+ ret = 0;
+ }
+ return ret;
+}
+
+void
+_destroy_crawl_event_data (void *data)
+{
+ shd_crawl_event_t *crawl_event = NULL;
+
+ if (!data)
+ goto out;
+
+ crawl_event = (shd_crawl_event_t *)data;
+ GF_FREE (crawl_event->start_time_str);
+ GF_FREE (crawl_event->end_time_str);
+
+out:
+ return;
+}
+
+void
+_destroy_shd_event_data (void *data)
+{
+ shd_event_t *event = NULL;
+ if (!data)
+ goto out;
+ event = (shd_event_t*)data;
+ GF_FREE (event->path);
+out:
+ return;
+}
+void
+shd_cleanup_event (void *event)
+{
+ shd_event_t *shd_event = event;
+
+ if (!shd_event)
+ goto out;
+ GF_FREE (shd_event->path);
+ GF_FREE (shd_event);
+out:
+ return;
+}
+
+int
+afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+{
+ int i = 0;
+ int ret = -1;
+ for (i = 0; i < child_count; i++) {
+ if (shd->pos[i] == AFR_POS_LOCAL) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child,
+ shd_crawl_event_t *shd_event, struct timeval *tv)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ healed_count = shd_event->healed_count;
+ split_brain_count = shd_event->split_brain_count;
+ heal_failed_count = shd_event->heal_failed_count;
+ start_time_str = shd_event->start_time_str;
+ end_time_str = shd_event->end_time_str;
+ crawl_type = shd_event->crawl_type;
+
+ if (!start_time_str) {
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "split_brain_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup (crawl_type));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_type to output");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_failed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup(start_time_str));
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_start_time to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (!end_time_str)
+ end_time_str = "Could not determine the end time";
+ ret = dict_set_dynstr (output, key, gf_strdup(end_time_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_end_time to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (shd_event->crawl_inprogress == _gf_true)
+ progress = 1;
+ else
+ progress = 0;
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "inprogress to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment the "
+ "counter.");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv, gf_boolean_t dyn)
+{
+ //subkey not used for now
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ if (dyn)
+ ret = dict_set_dynstr (output, key, path);
+ else
+ ret = dict_set_str (output, key, path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ path);
+ goto out;
+ }
+
+ if (!tv)
+ goto inc_count;
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
+ path);
+ goto out;
+ }
+
+inc_count:
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
+ char **fpath, gf_boolean_t *missing)
+{
+ dict_t *xattr = NULL;
+ char *path = NULL;
+ int ret = -1;
+
+ ret = syncop_getxattr (readdir_xl, child, &xattr, GFID_TO_PATH_KEY);
+ if (ret < 0) {
+ if ((errno == ENOENT) && missing)
+ *missing = _gf_true;
+ goto out;
+ }
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get path for "
+ "gfid %s", uuid_utoa (child->gfid));
+ goto out;
+ }
+ path = gf_strdup (path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *fpath = path;
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+_add_event_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ if (shd_event->child != dump_data->child)
+ goto out;
+ ret = _add_path_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event->path, &cb->tv,
+ _gf_false);
+out:
+ return ret;
+}
+
+int
+_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_crawl_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event, &cb->tv);
+ return ret;
+}
+
+int
+_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (eh, &dump_data, _add_event_to_dict);
+ return 0;
+}
+
+
+int
+_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (shd->statistics[child], &dump_data,
+ _add_crawl_event_statistics_to_dict);
+ return 0;
+
+}
+
+void
+_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
+ loc_t *parent, char *fname)
+{
+ int ret = 0;
+ loc_t index_loc = {0};
+
+ ret = _build_index_loc (this, &index_loc, fname, parent);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if(ret && (errno != ENOENT)) {
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to remove index "
+ "on %s - %s",index_loc.name, readdir_xl->name,
+ strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+out:
+ return;
+}
+
+int
+_count_hard_links_under_base_indices_dir (xlator_t *this,
+ afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *childloc,
+ loc_t *parentloc, struct iatt *iattr)
+{
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+ struct iatt parent = {0};
+ int ret = 0;
+ dict_t *output = NULL;
+ int xl_id = 0;
+ char key[256] = {0};
+ int child = -1;
+ uint64_t hardlinks = 0;
+
+ output = crawl_data->op_data;
+ child = crawl_data->child;
+
+ ret = syncop_lookup (readdir_xl, childloc, NULL, iattr, NULL, &parent);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%d-%d-hardlinks", xl_id, child);
+ ret = dict_get_uint64 (output, key, &hardlinks);
+
+ /*Removing the count of base_entry under indices/base_indicies and
+ * entry under indices/xattrop */
+ hardlinks = hardlinks + iattr->ia_nlink - 2;
+ ret = dict_set_uint64 (output, key, hardlinks);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+int
+_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry,
+ loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+{
+ dict_t *output = NULL;
+ xlator_t *readdir_xl = NULL;
+ int ret = -1;
+ char *path = NULL;
+ gf_boolean_t missing = _gf_false;
+ char gfid_str[64] = {0};
+
+ if (uuid_is_null (childloc->gfid))
+ goto out;
+
+ output = crawl_data->op_data;
+ readdir_xl = crawl_data->readdir_xl;
+
+ ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path,
+ &missing);
+ if (ret == 0) {
+ ret = _add_path_to_dict (this, output, crawl_data->child, path,
+ NULL, _gf_true);
+ } else if (missing) {
+ _remove_stale_index (this, readdir_xl, parentloc,
+ uuid_utoa_r (childloc->gfid, gfid_str));
+ }
+
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return ret;
+}
+
+void
+_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
+ afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ eh_t *eh = NULL;
+ char *path = NULL;
+ char gfid_str[64] = {0};
+ shd_event_t *event = NULL;
+ int32_t sh_failed = 0;
+ gf_boolean_t split_brain = 0;
+ int32_t actual_sh_done = 0;
+ shd_crawl_event_t **shd_crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (crawl_data->crawl == INDEX) {
+ if ((op_ret < 0) && (op_errno == ENOENT)) {
+ _remove_stale_index (this, crawl_data->readdir_xl,
+ parent, uuid_utoa_r (child->gfid,
+ gfid_str));
+ goto out;
+ }
+ ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
+ child, &path, NULL);
+ if (ret)
+ goto out;
+ } else {
+ path = gf_strdup (child->path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (xattr_rsp) {
+ ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
+ ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done);
+ }
+
+ shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events);
+
+ split_brain = afr_is_split_brain (this, child->inode);
+ if ((op_ret < 0 && op_errno == EIO) || split_brain) {
+ eh = shd->split_brain;
+ shd_crawl_event[crawl_data->child]->split_brain_count += 1;
+ } else if ((op_ret < 0) || sh_failed) {
+ eh = shd->heal_failed;
+ shd_crawl_event[crawl_data->child]->heal_failed_count += 1;
+ } else if (actual_sh_done == 1) {
+ eh = shd->healed;
+ shd_crawl_event[crawl_data->child]->healed_count += 1;
+ }
+ ret = -1;
+
+ if (eh != NULL) {
+ event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
+ if (!event)
+ goto out;
+ event->child = crawl_data->child;
+ event->path = path;
+
+ ret = eh_save_history (eh, event);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save "
+ "to event history, (%d, %s)", path, op_ret,
+ strerror (op_errno));
+
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ",
+ path);
+
+ }
+ ret = 0;
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return;
+}
+
+int
+_link_inode_update_loc (xlator_t *this, loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link (loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "inode link failed "
+ "on the inode (%s)", uuid_utoa (iattr->ia_gfid));
+ goto out;
+ }
+ inode_unref (loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
+ loc_t *child, loc_t *parent, struct iatt *iattr)
+{
+ struct iatt parentbuf = {0};
+ int ret = 0;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1);
+
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
+
+ ret = syncop_lookup (this, child, xattr_req,
+ iattr, &xattr_rsp, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
+ crawl_data);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ if (ret == 0)
+ ret = _link_inode_update_loc (this, child, iattr);
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
+void
+_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_start_crawl (this, child, crawl, _self_heal_entry,
+ NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ afr_crawl_done);
+}
+
+gf_boolean_t
+_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t proceed = _gf_false;
+ char *msg = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (!shd->enabled) {
+ msg = "Self-heal daemon is not enabled";
+ gf_log (this->name, GF_LOG_DEBUG, "%s", msg);
+ goto out;
+ }
+ if (!priv->child_up[child]) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , "
+ "subvol went down", priv->children[child]->name);
+ msg = "Brick is Not connected";
+ goto out;
+ }
+
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl as "
+ "< 2 children are up");
+ msg = "< 2 bricks in replica are running";
+ goto out;
+ }
+ }
+ proceed = _gf_true;
+out:
+ if (reason)
+ *reason = msg;
+ return proceed;
+}
+
+int
+_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ shd_crawl_op op, dict_t *output)
+{
+ afr_private_t *priv = NULL;
+ char *status = NULL;
+ char *subkey = NULL;
+ char key[256] = {0};
+ shd_pos_t pos_data = {0};
+ int op_ret = -1;
+ int xl_id = -1;
+ int i = 0;
+ int ret = 0;
+ int crawl_flags = 0;
+
+ priv = this->private;
+ if (op == HEAL)
+ crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+
+ if (output) {
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
+ "translator-id is not available");
+ goto out;
+ }
+ }
+ pos_data.this = this;
+ subkey = "status";
+ for (i = 0; i < priv->child_count; i++) {
+ if (_crawl_proceed (this, i, crawl_flags, &status)) {
+ pos_data.child = i;
+ /*
+ * We're already in a synctask in this case, so we
+ * don't need to defer through a second (and in fact
+ * that can cause deadlock). Just call straight
+ * through instead.
+ */
+ ret = afr_find_child_position(pos_data.this,
+ pos_data.child,
+ &pos_data.pos);
+ if (ret) {
+ status = "Not able to find brick location";
+ } else if (pos_data.pos == AFR_POS_REMOTE) {
+ status = "brick is remote";
+ } else {
+ op_ret = 0;
+ if (op == HEAL) {
+ status = "Started self-heal";
+ _do_self_heal_on_subvol (this, i,
+ crawl);
+ } else if (output && (op == INFO)) {
+ status = "";
+ afr_start_crawl (this, i, INDEX,
+ _add_summary_to_dict,
+ output, _gf_false, 0,
+ NULL);
+ } else if (output &&
+ (op == STATISTICS_TO_BE_HEALED)) {
+ status = "";
+ afr_start_crawl (this, i,
+ INDEX_TO_BE_HEALED,
+ _count_hard_links_under_base_indices_dir,
+ output, _gf_false,
+ 0, NULL);
+ }
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
+ i, subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ if (!op_ret && (crawl == FULL))
+ break;
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i,
+ subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ }
+out:
+ return op_ret;
+}
+
+int
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+}
+
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
+}
+
+void
+afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd= &priv->shd;
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_statistics_to_dict (this, dict, i);
+ }
+
+ return ;
+}
+
+static void
+reset_crawl_event (shd_crawl_event_t *crawl_event)
+{
+ crawl_event->healed_count = 0;
+ crawl_event->split_brain_count = 0;
+ crawl_event->heal_failed_count = 0;
+ GF_FREE (crawl_event->start_time_str);
+ crawl_event->start_time_str = NULL;
+ crawl_event->end_time_str = NULL;
+ crawl_event->crawl_type = NULL;
+ crawl_event->crawl_inprogress = _gf_false;
+ return;
+}
+
+static void
+afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst)
+{
+ dst->healed_count = src->healed_count;
+ dst->split_brain_count = src->split_brain_count;
+ dst->heal_failed_count = src->heal_failed_count;
+ dst->start_time_str = gf_strdup (src->start_time_str);
+ dst->end_time_str = "Crawl is already in progress";
+ dst->crawl_type = src->crawl_type;
+ dst->crawl_inprogress = _gf_true;
+ return;
+}
+
+static int
+afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict)
+{
+ shd_crawl_event_t *evnt = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd = &priv->shd;
+
+ evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t),
+ gf_afr_mt_shd_crawl_event_t);
+ if (!evnt) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+
+ reset_crawl_event (evnt);
+
+ if (!shd->crawl_events[i]) {
+ continue;
+ }
+
+ afr_copy_crawl_event_struct (shd->crawl_events[i],
+ evnt);
+ _add_crawl_stats_to_dict (this, dict, i, evnt, NULL);
+
+ }
+ }
+ UNLOCK (&priv->lock);
+ reset_crawl_event (evnt);
+ GF_FREE (evnt);
+
+out:
+ return ret;
+}
static int
-_crawl_directory (loc_t *loc, pid_t pid);
+_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_fill_completed_crawl_statistics_to_dict (this, dict);
+ ret = afr_fill_crawl_statistics_of_running_crawl (this, dict);
+ return ret;
+}
+int
+_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_eh_to_dict (this, eh, dict, i);
+ }
+ return 0;
+}
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int xl_id = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ ret = _do_self_heal_on_local_subvols (this, INDEX, output);
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ ret = _do_self_heal_on_local_subvols (this, FULL, output);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ (void)_get_index_summary_on_local_subvols (this, output);
+ ret = 0;
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->healed, output);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed,
+ output);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->split_brain,
+ output);
+ break;
+ case GF_AFR_OP_STATISTICS:
+ ret = _add_local_subvols_crawl_statistics_to_dict (this, output);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+ STATISTICS_TO_BE_HEALED,
+ output);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
+
+void
+afr_poll_self_heal (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct timespec timeout = {0};
+ xlator_t *this = NULL;
+ long child = (long)data;
+ gf_timer_t *old_timer = NULL;
+ gf_timer_t *new_timer = NULL;
+ shd_pos_t pos_data = {0};
+ int ret = 0;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (shd->pos[child] == AFR_POS_UNKNOWN) {
+ pos_data.this = this;
+ pos_data.child = child;
+ ret = synctask_new (this->ctx->env,
+ afr_syncop_find_child_position,
+ NULL, NULL, &pos_data);
+ if (!ret)
+ shd->pos[child] = pos_data.pos;
+ }
+ if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL))
+ _do_self_heal_on_subvol (this, child, INDEX);
+ timeout.tv_sec = shd->timeout;
+ timeout.tv_nsec = 0;
+ //notify and previous timer should be synchronized.
+ LOCK (&priv->lock);
+ {
+ old_timer = shd->timer[child];
+ if (shd->pos[child] == AFR_POS_REMOTE)
+ goto unlock;
+ shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
+ afr_poll_self_heal,
+ data);
+ new_timer = shd->timer[child];
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (old_timer)
+ gf_timer_call_cancel (this->ctx, old_timer);
+ if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not create self-heal polling timer for %s",
+ priv->children[child]->name);
+ }
+ return;
+}
+
+static int
+afr_handle_child_up (int ret, call_frame_t *sync_frame, void *data)
+{
+ afr_self_heald_t *shd = NULL;
+ shd_pos_t *pos_data = data;
+ afr_private_t *priv = NULL;
+
+ if (ret)
+ goto out;
+
+ priv = pos_data->this->private;
+ shd = &priv->shd;
+ shd->pos[pos_data->child] = pos_data->pos;
+ if (pos_data->pos != AFR_POS_REMOTE)
+ afr_poll_self_heal ((void*)(long)pos_data->child);
+ _do_self_heal_on_local_subvols (THIS, INDEX, NULL);
+out:
+ GF_FREE (data);
+ return 0;
+}
+
+void
+afr_proactive_self_heal (void *data)
+{
+ xlator_t *this = NULL;
+ long child = (long)data;
+ shd_pos_t *pos_data = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ //Position of brick could have changed and it could be local now.
+ //Compute the position again
+ pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
+ if (!pos_data)
+ goto out;
+ pos_data->this = this;
+ pos_data->child = child;
+ ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
+ afr_handle_child_up, NULL, pos_data);
+ if (ret)
+ goto out;
+out:
+ return;
+}
+
static int
get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
{
@@ -84,30 +1142,206 @@ out:
return ret;
}
-inline void
-afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+int
+afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
+ loc_t *dirloc)
{
- afr_update_loc_gfids (loc, iatt, parent);
- uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+ void *base_indices_holder_vgfid = NULL;
+ loc_t rootloc = {0};
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+ int ret = 0;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ afr_build_root_loc (this, dirloc);
+ } else if (crawl_data->crawl == INDEX) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get index "
+ "dir gfid on %s", readdir_xl->name);
+ goto out;
+ }
+ if (!index_gfid) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL,
+ &iattr, NULL, &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed on index dir on %s - (%s)",
+ readdir_xl->name, strerror (errno));
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_BASE_INDICES_HOLDER_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_BASE_INDICES_HOLDER_GFID,
+ &base_indices_holder_vgfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ if (!base_indices_holder_vgfid) {
+ gf_log (this->name, GF_LOG_ERROR, "Base indices holder"
+ "virtual gfid is null on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, base_indices_holder_vgfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL, &iattr, NULL,
+ &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed for base_indices_holder dir"
+ " on %s - (%s)", readdir_xl->name,
+ strerror (errno));
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "base_indices"
+ "_holder is not yet created.");
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+ return ret;
+}
+
+int
+afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
+ loc_t *dirloc)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ if (crawl_data->crawl == FULL) {
+ fd = fd_create (dirloc->inode, crawl_data->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", dirloc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s", dirloc->path);
+ goto out;
+ }
+ } else {
+ fd = fd_anonymous (dirloc->inode);
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *dirfd = fd;
+ return ret;
+}
+
+xlator_t*
+afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
+{
+ afr_private_t *priv = this->private;
+
+ if (crawl_data->crawl == FULL) {
+ return this;
+ } else {
+ return priv->children[crawl_data->child];
+ }
+ return NULL;
+}
+
+int
+afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
+ gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ ret = afr_build_child_loc (this, child, parent, entry->d_name);
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ ret = _build_index_loc (this, child, entry->d_name, parent);
+ if (ret)
+ goto out;
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+ child->path = NULL;
+ } else {
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode)
+ goto out;
+ uuid_parse (entry->d_name, child->gfid);
+ ret = _loc_assign_gfid_path (child);
+ }
+out:
+ return ret;
}
static int
-_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- off_t *offset, pid_t pid)
+_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
+ off_t *offset, afr_crawl_data_t *crawl_data)
{
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
- struct iatt iatt = {0};
- struct iatt parent = {0};;
int ret = 0;
loc_t entry_loc = {0};
+ fd_t *fd = NULL;
+ struct iatt iattr = {0};
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
*offset = entry->d_off;
if (IS_ENTRY_CWD (entry->d_name) ||
IS_ENTRY_PARENT (entry->d_name))
continue;
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ if ((crawl_data->crawl == FULL) &&
+ uuid_is_null (entry->d_stat.ia_gfid)) {
gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
"gfid present skipping",
parentloc->path, entry->d_name);
@@ -115,88 +1349,93 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
}
loc_wipe (&entry_loc);
- ret = afr_build_child_loc (this, &entry_loc, parentloc,
- entry->d_name, entry->d_stat.ia_gfid);
+ ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
+ entry, crawl_data);
if (ret)
goto out;
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s", entry_loc.path);
+ ret = crawl_data->process_entry (this, crawl_data, entry,
+ &entry_loc, parentloc, &iattr);
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, NULL, &parent);
- //Don't fail the crawl if lookup fails as it
- //could be because of split-brain
- if (ret || (!IA_ISDIR (iatt.ia_type)))
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED && ret) {
+ goto out;
+ } else if (ret) {
continue;
- afr_fill_loc_info (&entry_loc, &iatt, &parent);
- ret = _crawl_directory (&entry_loc, pid);
+ }
+
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED))
+ continue;
+
+ if (!IA_ISDIR (iattr.ia_type))
+ continue;
+ fd = NULL;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
+ if (ret)
+ continue;
+ ret = _crawl_directory (fd, &entry_loc, crawl_data);
+ if (fd)
+ fd_unref (fd);
}
ret = 0;
out:
- if (entry_loc.path)
- loc_wipe (&entry_loc);
+ if ((crawl_data->crawl == INDEX_TO_BE_HEALED) && ret) {
+ gf_log (this->name, GF_LOG_ERROR,"Failed to get the hardlink "
+ "count");
+ }
+ loc_wipe (&entry_loc);
return ret;
}
static int
-_crawl_directory (loc_t *loc, pid_t pid)
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
{
xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
off_t offset = 0;
gf_dirent_t entries;
- struct iatt iatt = {0};
- struct iatt parent = {0};;
int ret = 0;
gf_boolean_t free_entries = _gf_false;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
INIT_LIST_HEAD (&entries.list);
this = THIS;
- priv = this->private;
GF_ASSERT (loc->inode);
- gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
- fd = fd_create (loc->inode, pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", loc->path);
- goto out;
- }
-
- if (!loc->parent) {
- ret = syncop_lookup (this, loc, NULL,
- &iatt, NULL, &parent);
- }
-
- ret = syncop_opendir (this, loc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s", loc->path);
- goto out;
- }
+ if (crawl_data->crawl == FULL)
+ gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "crawling INDEX %s",
+ uuid_utoa (loc->gfid));
- while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
+ while (1) {
+ if (crawl_data->crawl == FULL)
+ ret = syncop_readdirp (readdir_xl, fd, 131072, offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (readdir_xl, fd, 131072, offset,
+ &entries);
+ if (ret <= 0)
+ break;
ret = 0;
free_entries = _gf_true;
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
+
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
ret = -1;
goto out;
}
-
if (list_empty (&entries.list))
goto out;
- ret = _perform_self_heal (this, loc, &entries, &offset, pid);
+ ret = _process_entries (this, loc, &entries, &offset,
+ crawl_data);
+ if ((ret < 0) && (crawl_data->crawl == INDEX_TO_BE_HEALED)) {
+ goto out;
+ }
gf_dirent_free (&entries);
free_entries = _gf_false;
}
- if (fd)
- fd_unref (fd);
ret = 0;
out:
if (free_entries)
@@ -204,280 +1443,333 @@ out:
return ret;
}
+static char*
+position_str_get (afr_child_pos_t pos)
+{
+ switch (pos) {
+ case AFR_POS_UNKNOWN:
+ return "unknown";
+ case AFR_POS_LOCAL:
+ return "local";
+ case AFR_POS_REMOTE:
+ return "remote";
+ }
+ return NULL;
+}
+
int
-afr_find_child_position (xlator_t *this, int child)
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos)
{
afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
dict_t *xattr_rsp = NULL;
loc_t loc = {0};
int ret = 0;
- gf_boolean_t local = _gf_false;
- char *pathinfo = NULL;
- afr_child_pos_t *pos = NULL;
- inode_table_t *itable = NULL;
+ char *node_uuid = NULL;
priv = this->private;
- pos = &priv->shd.pos[child];
-
- if (*pos != AFR_POS_UNKNOWN) {
- goto out;
- }
+ shd = &priv->shd;
- //TODO: Hack to make the root_loc hack work
- LOCK (&priv->lock);
- {
- if (!priv->root_inode) {
- itable = inode_table_new (0, this);
- if (!itable)
- goto unlock;
- priv->root_inode = inode_new (itable);
- if (!priv->root_inode)
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&priv->lock);
-
- if (!priv->root_inode) {
- ret = -1;
- goto out;
- }
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
- GF_XATTR_PATHINFO_KEY);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child "
- "%d", child);
+ GF_XATTR_NODE_UUID_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s - "
+ "(%s)", priv->children[child]->name, strerror (errno));
goto out;
}
- ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ ret = dict_get_str (xattr_rsp, GF_XATTR_NODE_UUID_KEY, &node_uuid);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on "
- "child %d", child);
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid key not found on "
+ "child %s", priv->children[child]->name);
goto out;
}
- ret = afr_local_pathinfo (pathinfo, &local);
- if (ret)
- goto out;
- if (local)
+ if (!strcmp (node_uuid, shd->node_uuid))
*pos = AFR_POS_LOCAL;
else
*pos = AFR_POS_REMOTE;
- gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos);
+ gf_log (this->name, GF_LOG_DEBUG, "child %s is %s",
+ priv->children[child]->name, position_str_get (*pos));
out:
+ if (ret)
+ *pos = AFR_POS_UNKNOWN;
+ loc_wipe (&loc);
return ret;
}
-static int
-afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+int
+afr_syncop_find_child_position (void *data)
{
- GF_FREE (data);
- STACK_DESTROY (sync_frame->root);
- return 0;
+ shd_pos_t *pos_data = data;
+ int ret = 0;
+
+ ret = afr_find_child_position (pos_data->this, pos_data->child,
+ &pos_data->pos);
+ return ret;
}
static int
-afr_find_all_children_postions (xlator_t *this)
+afr_dir_crawl (void *data)
{
- int ret = -1;
- int i = 0;
- gf_boolean_t succeeded = _gf_false;
- afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ xlator_t *readdir_xl = NULL;
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ afr_crawl_data_t *crawl_data = data;
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] != 1)
- continue;
- ret = afr_find_child_position (this, i);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to determine if the "
- "child %s is local.",
- priv->children[i]->name);
- continue;
+ this = THIS;
+
+ if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags,
+ NULL))
+ goto out;
+
+ readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
+ if (!readdir_xl)
+ goto out;
+ crawl_data->readdir_xl = readdir_xl;
+
+ ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+ if (ret) {
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open base_"
+ "indices_holder");
}
- succeeded = _gf_true;
+ goto out;
}
- if (succeeded)
- ret = 0;
+
+ ret = _crawl_directory (fd, &dirloc, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
+ readdir_xl->name);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "Crawl completed "
+ "on %s", readdir_xl->name);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+out:
+ if (fd)
+ fd_unref (fd);
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED ))
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
return ret;
}
-static gf_boolean_t
-afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count)
+char *
+get_crawl_type_in_string (afr_crawl_type_t crawl)
{
- int i = 0;
- gf_boolean_t local = _gf_false;
-
- for (i = 0; i < child_count; i++, pos++) {
- if (*pos == AFR_POS_LOCAL) {
- local = _gf_true;
- break;
- }
+ char *index = "INDEX";
+ char *full = "FULL";
+ char *crawl_type = NULL;
+
+ if (crawl == INDEX){
+ crawl_type = index;
+ } else if (crawl == FULL) {
+ crawl_type = full;
}
- return local;
+
+ return crawl_type;
}
-int
-afr_init_child_position (xlator_t *this, int child)
+static int
+afr_allocate_crawl_event (xlator_t *this, int child, afr_crawl_type_t crawl)
{
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ shd_crawl_event_t *crawl_event = NULL;
+ time_t get_time = 0;
- if (child == AFR_ALL_CHILDREN) {
- ret = afr_find_all_children_postions (this);
- } else {
- ret = afr_find_child_position (this, child);
+ priv = this->private;
+ shd = &priv->shd;
+
+ crawl_event = GF_CALLOC (sizeof (shd_crawl_event_t), 1,
+ gf_afr_mt_shd_crawl_event_t);
+ if (!crawl_event) {
+ ret = -1;
+ goto out;
+ }
+
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+
+ crawl_event->start_time_str = gf_strdup (ctime(&get_time));
+
+ crawl_event->crawl_type = get_crawl_type_in_string (crawl);
+ if (!crawl_event->crawl_type) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ shd->crawl_events[child] = crawl_event;
}
+ UNLOCK (&priv->lock);
+ ret = 0;
+out:
return ret;
+
}
-int
-afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
+static int
+afr_put_crawl_event_in_eh (xlator_t *this, int child)
{
- gf_boolean_t local = _gf_false;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ time_t get_time = 0;
+ shd_crawl_event_t **crawl_event = NULL;
- if (child == AFR_ALL_CHILDREN)
- local = afr_local_child_exists (shd->pos, child_count);
- else
- local = (shd->pos[child] == AFR_POS_LOCAL);
+ priv = this->private;
+ shd = &priv->shd;
- return local;
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+ crawl_event = (shd_crawl_event_t**)shd->crawl_events;
+ LOCK (&priv->lock);
+ {
+ crawl_event[child]->end_time_str = gf_strdup (ctime(&get_time));
+ ret = eh_save_history (shd->statistics[child],
+ crawl_event[child]);
+ crawl_event[child] = NULL;
+ }
+ UNLOCK (&priv->lock);
+out:
+ return ret;
}
static int
-afr_crawl_directory (xlator_t *this, pid_t pid)
+afr_dir_exclusive_crawl (void *data)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
- loc_t loc = {0};
gf_boolean_t crawl = _gf_false;
- int ret = 0;
+ int ret = 0;
+ int child = -1;
+ xlator_t *this = NULL;
+ afr_crawl_data_t *crawl_data = data;
+ this = THIS;
priv = this->private;
shd = &priv->shd;
-
+ child = crawl_data->child;
LOCK (&priv->lock);
{
- if (shd->inprogress) {
- shd->pending = _gf_true;
+ if (shd->inprogress[child]) {
+ if (shd->pending[child] != FULL)
+ shd->pending[child] = crawl_data->crawl;
} else {
- shd->inprogress = _gf_true;
+ shd->inprogress[child] = _gf_true;
crawl = _gf_true;
}
}
UNLOCK (&priv->lock);
- if (!priv->root_inode) {
- ret = -1;
+ if (!crawl) {
+ gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
+ "for %s", priv->children[child]->name);
goto out;
}
- if (!crawl)
- goto out;
-
- afr_build_root_loc (priv->root_inode, &loc);
- while (crawl) {
- ret = _crawl_directory (&loc, pid);
+ do {
+ ret = afr_allocate_crawl_event (this, child, crawl_data->crawl);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed");
- else
- gf_log (this->name, GF_LOG_INFO, "Crawl completed");
+ goto out;
+ afr_dir_crawl (data);
+
+ ret = afr_put_crawl_event_in_eh (this, child);
+ if (ret < 0)
+ goto out;
+
LOCK (&priv->lock);
{
- if (shd->pending) {
- shd->pending = _gf_false;
+ if (shd->pending[child] != NONE) {
+ crawl_data->crawl = shd->pending[child];
+ shd->pending[child] = NONE;
} else {
- shd->inprogress = _gf_false;
+ shd->inprogress[child] = _gf_false;
crawl = _gf_false;
}
}
UNLOCK (&priv->lock);
- }
-out:
- return ret;
-}
-
-static int
-afr_crawl (void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int ret = -1;
- afr_crawl_data_t *crawl_data = data;
-
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
-
- ret = afr_init_child_position (this, crawl_data->child);
- if (ret)
- goto out;
-
- if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
- goto out;
-
- ret = afr_crawl_directory (this, crawl_data->pid);
+ } while (crawl);
out:
return ret;
}
void
-afr_proactive_self_heal (xlator_t *this, int idx)
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done)
{
afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
call_frame_t *frame = NULL;
afr_crawl_data_t *crawl_data = NULL;
int ret = 0;
+ int (*crawler) (void*) = NULL;
priv = this->private;
- shd = &priv->shd;
- if (!shd->enabled)
- goto out;
-
- if ((idx != AFR_ALL_CHILDREN) &&
- (shd->pos[idx] == AFR_POS_REMOTE))
- goto out;
frame = create_frame (this, this->ctx->pool);
if (!frame)
goto out;
- afr_set_lk_owner (frame, this);
+ afr_set_lk_owner (frame, this, frame->root);
afr_set_low_priority (frame);
crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
- gf_afr_mt_afr_crawl_data_t);
+ gf_afr_mt_crawl_data_t);
if (!crawl_data)
goto out;
+ crawl_data->process_entry = process_entry;
crawl_data->child = idx;
crawl_data->pid = frame->root->pid;
- gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx);
- ret = synctask_new (this->ctx->env, afr_crawl,
- afr_crawl_done, frame, crawl_data);
+ crawl_data->crawl = crawl;
+ crawl_data->op_data = op_data;
+ crawl_data->crawl_flags = crawl_flags;
+ gf_log (this->name, GF_LOG_DEBUG, "starting crawl %d for %s",
+ crawl_data->crawl, priv->children[idx]->name);
+
+ if (exclusive)
+ crawler = afr_dir_exclusive_crawl;
+ else
+ crawler = afr_dir_crawl;
+ ret = synctask_new (this->ctx->env, crawler,
+ crawl_done, frame, crawl_data);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Could not create the "
- "task for %d ret %d", idx, ret);
+ gf_log (this->name, GF_LOG_ERROR, "afr crawl failed for child"
+ " %d with ret %d", idx, ret);
out:
return;
}
-//TODO: This is a hack
void
-afr_build_root_loc (inode_t *inode, loc_t *loc)
+afr_build_root_loc (xlator_t *this, loc_t *loc)
{
- loc->path = "/";
- loc->name = "";
- loc->inode = inode;
- loc->inode->ia_type = IA_IFDIR;
- memset (loc->inode->gfid, 0, 16);
- loc->inode->gfid[15] = 1;
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ loc->path = gf_strdup ("/");
+ loc->name = "";
+ loc->inode = inode_ref (priv->root_inode);
+ uuid_copy (loc->gfid, loc->inode->gfid);
}
int
@@ -493,4 +1785,3 @@ afr_set_root_gfid (dict_t *dict)
return ret;
}
-
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 6eb119b07..e0c083754 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEALD_H__
@@ -27,17 +18,48 @@
#define AFR_ALL_CHILDREN -1
typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
+ int child;
+ pid_t pid;
+ afr_crawl_type_t crawl;
+ xlator_t *readdir_xl;
+ void *op_data;
+ int crawl_flags;
+ int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
} afr_crawl_data_t;
-void afr_proactive_self_heal (xlator_t *this, int idx);
+typedef struct crawl_event_stats_ {
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+ char *start_time_str;
+ char *end_time_str;
+ char *crawl_type;
+ gf_boolean_t crawl_inprogress;
+} shd_crawl_event_t;
+
+void _destroy_crawl_event_data (void *data);
+void _destroy_shd_event_data (void *data);
+
+typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
-void afr_build_root_loc (inode_t *inode, loc_t *loc);
+void afr_build_root_loc (xlator_t *this, loc_t *loc);
int afr_set_root_gfid (dict_t *dict);
-inline void
-afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent);
+void
+afr_proactive_self_heal (void *data);
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+
+/*
+ * In addition to its self-heal use, this is used to find a local default
+ * read_child.
+ */
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local);
#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 6ae493f1c..20306e469 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
@@ -32,48 +24,75 @@
of RENAME */
#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
-
afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
{
uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- ret = fd_ctx_get (fd, this, &ctx);
+ priv = this->private;
- if (ret < 0)
- goto out;
+ ret = __fd_ctx_get (fd, this, &ctx);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ if (ret < 0 && fd_is_anonymous (fd)) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
out:
return fd_ctx;
}
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
+
+
static void
-afr_pid_save (call_frame_t *frame)
+afr_save_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- local->saved_pid = frame->root->pid;
+ local->saved_lk_owner = frame->root->lk_owner;
}
static void
-afr_pid_restore (call_frame_t *frame)
+afr_restore_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- frame->root->pid = local->saved_pid;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
static void
__mark_all_pending (int32_t *pending[], int child_count,
afr_transaction_type type)
@@ -126,51 +145,23 @@ out:
return;
}
-
-static void
-__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
-{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- local = frame->local;
-
- if (!local->fd)
- return;
-
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- if (!fd_ctx)
- goto out;
-
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]--;
- }
- UNLOCK (&local->fd->lock);
-out:
- return;
-}
-
-
static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+__mark_non_participant_children (int32_t *pending[], int child_count,
+ unsigned char *participants,
+ afr_transaction_type type)
{
int i = 0;
int j = 0;
+ j = afr_index_for_transaction_type (type);
for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
-
- if (!child_up[i])
+ if (!participants[i])
pending[i][j] = 0;
}
}
-static void
+void
__mark_all_success (int32_t *pending[], int child_count,
afr_transaction_type type)
{
@@ -183,6 +174,54 @@ __mark_all_success (int32_t *pending[], int child_count,
}
}
+void
+_set_all_child_errno (int *child_errno, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (child_errno[i] == 0)
+ child_errno[i] = ENOTCONN;
+}
+
+void
+afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+
+ _set_all_child_errno (local->child_errno, priv->child_count);
+
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
+
+
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ if (fd)
+ afr_delayed_changelog_wake_up (this, fd);
+ local->transaction.fop (frame, this);
+}
+
static int
__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
@@ -246,64 +285,42 @@ __fop_changelog_needed (call_frame_t *frame, xlator_t *this)
return op_ret;
}
-
-static int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op)
{
int i = 0;
int ret = 0;
+ if (op == LOCAL_FIRST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
+ if (i == child)
+ continue;
ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i], 3 * sizeof (int32_t));
+ pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
/* 3 = data+metadata+entry */
if (ret < 0)
goto out;
}
-
-out:
- return ret;
-}
-
-
-static int
-afr_set_piggyback_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
- afr_transaction_type type)
-{
- int i = 0;
- int ret = 0;
- int *arr = NULL;
- int index = 0;
- size_t pending_xattr_size = 3 * sizeof (int32_t);
- /* 3 = data+metadata+entry */
-
- index = afr_index_for_transaction_type (type);
-
- for (i = 0; i < priv->child_count; i++) {
- arr = GF_CALLOC (1, pending_xattr_size,
- gf_afr_mt_char);
- if (!arr) {
- ret = -1;
- goto out;
- }
-
- memcpy (arr, pending[i], pending_xattr_size);
-
- arr[index] = hton32 (ntoh32(arr[index]) + 1);
-
- ret = dict_set_bin (xattr, priv->pending_key[i],
- arr, pending_xattr_size);
-
- if (ret < 0)
+ if (op == LOCAL_LAST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
goto out;
}
-
out:
return ret;
}
-
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
{
@@ -331,7 +348,8 @@ afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
int32_t
afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
@@ -349,6 +367,11 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (call_count == 0) {
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+
if (afr_lock_server_count (priv, local->transaction.type) == 0) {
local->transaction.done (frame, this);
} else {
@@ -415,24 +438,41 @@ afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
afr_inode_rm_stale_children (this, inode, stale_children);
out:
- if (stale_children)
- GF_FREE (stale_children);
+ GF_FREE (stale_children);
return;
}
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
+{
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
+
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
+ }
+ return NULL;
+}
+
unsigned char*
afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
{
unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
switch (type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->inode_locked_nodes;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
break;
case AFR_ENTRY_TRANSACTION:
case AFR_ENTRY_RENAME_TRANSACTION:
- locked_nodes = int_lock->entry_locked_nodes;
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ locked_nodes = int_lock->lockee[0].locked_nodes;
break;
}
return locked_nodes;
@@ -470,12 +510,141 @@ afr_changelog_post_op_call_count (afr_transaction_type type,
return call_count;
}
+void
+afr_compute_txn_changelog (afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int index = 0;
+ int32_t postop = 0;
+ int32_t preop = 1;
+ int32_t **txn_changelog = NULL;
+
+ txn_changelog = local->transaction.txn_changelog;
+ index = afr_index_for_transaction_type (local->transaction.type);
+ for (i = 0; i < priv->child_count; i++) {
+ postop = ntoh32 (local->pending[i][index]);
+ txn_changelog[i][index] = hton32 (postop + preop);
+ }
+}
+
+afr_xattrop_type_t
+afr_get_postop_xattrop_type (int32_t **pending, int optimized, int child,
+ afr_transaction_type type)
+{
+ int index = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
+
+ index = afr_index_for_transaction_type (type);
+ if (optimized && !pending[child][index])
+ op = LOCAL_FIRST;
+ return op;
+}
+
+void
+afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
+ int optimized, int child)
+{
+ int32_t **txn_changelog = NULL;
+ int32_t **changelog = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
+
+ priv = this->private;
+ txn_changelog = local->transaction.txn_changelog;
+ op = afr_get_postop_xattrop_type (local->pending, optimized, child,
+ local->transaction.type);
+ if (optimized)
+ changelog = txn_changelog;
+ else
+ changelog = local->pending;
+ ret = afr_set_pending_dict (priv, xattr, changelog, child, op);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
+}
+
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int index = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ index = afr_index_for_transaction_type (local->transaction.type);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->pending[i][index] == 0)
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+static void
+afr_dir_fop_handle_all_fop_failures (call_frame_t *frame)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+
+ if ((local->transaction.type != AFR_ENTRY_TRANSACTION) &&
+ (local->transaction.type != AFR_ENTRY_RENAME_TRANSACTION))
+ return;
+
+ if (local->op_ret >= 0)
+ goto out;
+
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+out:
+ return;
+}
+
+static void
+afr_data_handle_quota_errors (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t all_quota_failures = _gf_false;
+
+ local = frame->local;
+ priv = this->private;
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+ /*
+ * Idea is to not leave the file in FOOL-FOOL scenario in case on
+ * all the bricks data transaction failed with EDQUOT to avoid
+ * increasing un-necessary load of self-heals in the system.
+ */
+ all_quota_failures = _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ (local->child_errno[i] != EDQUOT)) {
+ all_quota_failures = _gf_false;
+ break;
+ }
+ }
+ if (all_quota_failures)
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+}
+
int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
afr_private_t * priv = this->private;
afr_internal_lock_t *int_lock = NULL;
- int ret = 0;
int i = 0;
int call_count = 0;
@@ -483,14 +652,17 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
afr_fd_ctx_t *fdctx = NULL;
dict_t **xattr = NULL;
int piggyback = 0;
- int index = 0;
int nothing_failed = 1;
local = frame->local;
int_lock = &local->internal_lock;
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
+ __mark_non_participant_children (local->pending, priv->child_count,
+ local->transaction.pre_op,
+ local->transaction.type);
+
+ afr_data_handle_quota_errors (frame, this);
+ afr_dir_fop_handle_all_fop_failures (frame);
if (local->fd)
afr_transaction_rm_stale_children (frame, this,
@@ -518,82 +690,65 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
goto out;
}
- /* check if something has failed, to handle piggybacking */
- nothing_failed = 1;
- index = afr_index_for_transaction_type (local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending[i][index] == 0) {
- nothing_failed = 0;
- break;
- }
- }
+ nothing_failed = afr_txn_nothing_failed (frame, this);
- index = afr_index_for_transaction_type (local->transaction.type);
- if (local->optimistic_change_log &&
- local->transaction.type != AFR_DATA_TRANSACTION) {
- /* if nothing_failed, then local->pending[..] == {0 .. 0} */
- for (i = 0; i < priv->child_count; i++)
- local->pending[i][index]++;
- }
+ afr_compute_txn_changelog (local , priv);
for (i = 0; i < priv->child_count; i++) {
if (!local->transaction.pre_op[i])
continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
{
if (!fdctx) {
+ afr_set_postop_dict (local, this, xattr[i],
+ 0, i);
STACK_WIND (frame, afr_changelog_post_op_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
break;
}
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_piggyback[i]) {
- fdctx->pre_op_piggyback[i]--;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
+ /* local->transaction.postop_piggybacked[] was
+ precomputed in is_piggyback_postop() when called from
+ afr_changelog_post_op_safe()
+ */
+
+ piggyback = 0;
+ if (local->transaction.postop_piggybacked[i])
+ piggyback = 1;
- if (piggyback && !nothing_failed)
- ret = afr_set_piggyback_dict (priv, xattr[i],
- local->pending,
- local->transaction.type);
+ afr_set_postop_dict (local, this, xattr[i],
+ piggyback, i);
if (nothing_failed && piggyback) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i], NULL);
} else {
- __mark_pre_op_undone_on_fd (frame, this, i);
STACK_WIND_COOKIE (frame,
afr_changelog_post_op_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
}
break;
case AFR_METADATA_TRANSACTION:
{
- if (nothing_failed) {
+ if (nothing_failed && local->optimistic_change_log) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -602,28 +757,32 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND (frame, afr_changelog_post_op_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
{
- if (nothing_failed) {
+ if (nothing_failed && local->optimistic_change_log) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
} else {
STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
call_count--;
}
@@ -636,20 +795,17 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
value
*/
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
/* fall through */
case AFR_ENTRY_TRANSACTION:
{
- if (nothing_failed) {
+ if (nothing_failed && local->optimistic_change_log) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -658,13 +814,15 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND (frame, afr_changelog_post_op_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
}
@@ -684,7 +842,8 @@ out:
int32_t
afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = this->private;
@@ -729,12 +888,7 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(local->op_errno == ENOTSUP)) {
local->transaction.resume (frame, this);
} else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
-
- afr_pid_restore (frame);
-
- local->transaction.fop (frame, this);
+ afr_transaction_perform_fop (frame, this);
}
}
@@ -787,8 +941,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (!locked_nodes[i])
continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
if (ret < 0)
gf_log (this->name, GF_LOG_INFO,
@@ -805,7 +959,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
break;
}
@@ -822,9 +977,12 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
}
UNLOCK (&local->fd->lock);
+ afr_set_delayed_post_op (frame, this);
+
if (piggyback)
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
else
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -832,14 +990,16 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
case AFR_METADATA_TRANSACTION:
{
if (local->optimistic_change_log) {
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -850,7 +1010,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -858,7 +1019,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
@@ -866,7 +1028,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
if (local->optimistic_change_log) {
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
} else {
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -874,7 +1037,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
call_count--;
@@ -889,8 +1053,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
value
*/
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
if (ret < 0)
gf_log (this->name, GF_LOG_INFO,
@@ -902,7 +1066,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
if (local->optimistic_change_log) {
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -913,7 +1078,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -921,7 +1087,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
}
@@ -1083,12 +1250,14 @@ int
afr_set_transaction_flock (afr_local_t *local)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- int_lock->lk_flock.l_len = local->transaction.len;
- int_lock->lk_flock.l_start = local->transaction.start;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
+ inodelk->flock.l_type = F_WRLCK;
return 0;
}
@@ -1103,6 +1272,7 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
@@ -1116,8 +1286,8 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
case AFR_ENTRY_RENAME_TRANSACTION:
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
break;
case AFR_ENTRY_TRANSACTION:
@@ -1139,12 +1309,6 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int
afr_lock (call_frame_t *frame, xlator_t *this)
{
- afr_pid_save (frame);
-
- frame->root->pid = (long) frame->root;
-
- afr_set_lk_owner (frame, this);
-
afr_set_lock_number (frame, this);
return afr_lock_rec (frame, this);
@@ -1156,28 +1320,463 @@ afr_lock (call_frame_t *frame, xlator_t *this)
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_pre_op (frame, this);
+ } else {
+ afr_transaction_perform_fop (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
+
+ - changelog piggybacking
+ - eager locking
+ */
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local->transaction.eager_lock_on)
+ return;
+
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+gf_boolean_t
+afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ictx = NULL;
+
+ if (!inode) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
+
+ ictx = afr_inode_ctx_get (inode, this);
+ if (!ictx)
+ return _gf_true;
+
+ if (ictx->open_fd_count > 1)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv)
+{
+ if (local->success_count != priv->child_count)
+ return _gf_true;
+ return _gf_false;
+}
+
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+ afr_private_t *priv = NULL;
priv = this->private;
+
local = frame->local;
+ if (!local)
+ goto out;
- if (__fop_changelog_needed (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ if (!local->delayed_post_op)
+ goto out;
+
+ //Mark pending changelog ASAP
+ if (afr_any_fops_failed (local, priv))
+ goto out;
+
+ if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this))
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
+{
+ fd_t *fd = NULL;
+
+ fd = data;
+
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
+
+
+/*
+ Check if the frame is destined to get optimized away
+ with changelog piggybacking
+*/
+static gf_boolean_t
+is_piggyback_post_op (call_frame_t *frame, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t piggyback = _gf_true;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ fdctx = afr_fd_ctx_get (fd, frame->this);
+
+ LOCK(&fd->lock);
+ {
+ piggyback = _gf_true;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+ if (fdctx->pre_op_piggyback[i]) {
+ fdctx->pre_op_piggyback[i]--;
+ local->transaction.postop_piggybacked[i] = 1;
+ } else {
+ /* For at least _one_ subvolume we cannot
+ piggyback on the changelog, and have to
+ perform a hard POST-OP and therefore fsync
+ if necesssary
+ */
+ piggyback = _gf_false;
+ GF_ASSERT (fdctx->pre_op_done[i]);
+ fdctx->pre_op_done[i]--;
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ if (!afr_txn_nothing_failed (frame, frame->this)) {
+ /* something failed in this transaction,
+ we will be performing a hard post-op
+ */
+ return _gf_false;
+ }
+
+ return piggyback;
+}
+
+
+/* SET operation */
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&fd->lock);
+
+ return 0;
+}
+
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
- afr_pid_restore (frame);
+ fdctx = afr_fd_ctx_get (fd, this);
+ if (!fdctx)
+ return _gf_true;
- local->transaction.fop (frame, this);
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
}
+ UNLOCK (&fd->lock);
+
+ return witness;
+}
+
+
+int
+afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_changelog_post_op_now (frame, this);
return 0;
}
int
+afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata)
+ ret = dict_set_int32 (xdata, "batch-fsync", 1);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, xdata);
+ if (!--call_count)
+ break;
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+ int
+afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ if (is_piggyback_post_op (frame, local->fd)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync (frame, this);
+ } else {
+ afr_changelog_post_op_now (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timespec delta = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
+ {
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
+ }
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
+
+out:
+ if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
+ afr_changelog_post_op_safe (prev_frame, this);
+ } else if (stub) {
+ call_resume (stub);
+ }
+}
+
+
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
+}
+
+
+
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+ */
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+}
+
+
+ int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
@@ -1188,6 +1787,19 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+ }
+
+ afr_restore_lk_owner (frame);
+
if (__fop_changelog_needed (frame, this)) {
afr_changelog_post_op (frame, this);
} else {
@@ -1208,7 +1820,8 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
+ int child_index)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -1217,7 +1830,89 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index
priv = this->private;
__mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
+ child_index, local->transaction.type);
+}
+
+
+
+ static gf_boolean_t
+afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
+
+void
+afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
+
+ priv = this->private;
+
+ if (!local->fd)
+ return;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+
+ if (!priv->eager_lock)
+ return;
+
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
+
+ if (afr_are_multiple_fds_opened (local->fd->inode, this))
+ return;
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
+ }
+unlock:
+ UNLOCK (&local->fd->lock);
}
@@ -1226,20 +1921,43 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
local = frame->local;
priv = this->private;
- afr_transaction_local_init (local, this);
-
local->transaction.resume = afr_transaction_resume;
local->transaction.type = type;
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
+
+ afr_transaction_eager_lock_init (local, this);
+
+ if (local->fd && local->transaction.eager_lock_on)
+ afr_set_lk_owner (frame, this, local->fd);
+ else
+ afr_set_lk_owner (frame, this, frame->root);
+
+ if (!local->transaction.eager_lock_on && local->loc.inode) {
+ fd = fd_lookup (local->loc.inode, frame->root->pid);
+ if (fd == NULL)
+ fd = fd_lookup_anonymous (local->loc.inode);
+
+ if (fd) {
+ afr_delayed_changelog_wake_up (this, fd);
+ fd_unref (fd);
+ }
+ }
+
if (afr_lock_server_count (priv, local->transaction.type) == 0) {
afr_internal_lock_finish (frame, this);
} else {
afr_lock (frame, this);
}
-
- return 0;
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 10f274fec..fa626fd0d 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -1,25 +1,21 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+typedef enum {
+ LOCAL_FIRST = 1,
+ LOCAL_LAST = 2
+} afr_xattrop_type_t;
+
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
@@ -27,9 +23,29 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
afr_fd_ctx_t *
afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op);
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success (int32_t *pending[], int child_count,
+ afr_transaction_type type);
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv);
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index f3c3e7e7a..c724eb2ae 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -30,6 +21,11 @@
#endif
#include "afr-common.c"
+#define SHD_INODE_LRU_LIMIT 2048
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+
struct volume_options options[];
int32_t
@@ -37,8 +33,13 @@ notify (xlator_t *this, int32_t event,
void *data, ...)
{
int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- ret = afr_notify (this, event, data);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
return ret;
}
@@ -106,6 +107,7 @@ reconfigure (xlator_t *this, dict_t *options)
{
afr_private_t *priv = NULL;
xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
int ret = -1;
int index = -1;
char *qtype = NULL;
@@ -148,6 +150,9 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
+ options, uint32, out);
+
if (read_subvol) {
index = xlator_subvolume_index (this, read_subvol);
if (index == -1) {
@@ -158,10 +163,37 @@ reconfigure (xlator_t *this, dict_t *options)
priv->read_child = index;
}
+ GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
+
+ if (read_subvol_index >-1) {
+ index=read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
uint32, out);
fix_quorum_options(this,priv,qtype);
+ GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
+ options, size, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("readdir-failover", priv->readdir_failover, options,
+ bool, out);
+ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+ priv->did_discovery = _gf_false;
ret = 0;
out:
@@ -189,6 +221,7 @@ init (xlator_t *this)
int ret = -1;
GF_UNUSED int op_errno = 0;
xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
xlator_t *fav_child = NULL;
char *qtype = NULL;
@@ -204,7 +237,10 @@ init (xlator_t *this)
"Volume is dangling.");
}
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
LOCK_INIT (&priv->lock);
@@ -217,7 +253,6 @@ init (xlator_t *this)
priv->child_count = child_count;
-
priv->read_child = -1;
GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
@@ -229,6 +264,18 @@ init (xlator_t *this)
goto out;
}
}
+ GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ read_subvol_index);
+ goto out;
+ }
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
+
+ GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
priv->favorite_child = -1;
GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
@@ -263,6 +310,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
+ GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+
GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
@@ -279,10 +328,18 @@ init (xlator_t *this)
GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
+ GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
GF_OPTION_INIT ("quorum-type", qtype, str, out);
GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size,
+ out);
fix_quorum_options(this,priv,qtype);
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out);
+ GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
+ out);
+
priv->wait_count = 1;
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
@@ -323,8 +380,6 @@ init (xlator_t *this)
AFR_XATTR_PREFIX,
trav->xlator->name);
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
ret = -ENOMEM;
goto out;
}
@@ -333,6 +388,13 @@ init (xlator_t *this)
i++;
}
+ ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
+ this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
gf_afr_mt_int32_t);
if (!priv->last_event) {
@@ -340,16 +402,67 @@ init (xlator_t *this)
goto out;
}
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_afr_brick_pos_t);
- if (!priv->shd.pos) {
- ret = -ENOMEM;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
goto out;
}
priv->first_lookup = 1;
priv->root_inode = NULL;
+ if (!priv->shd.iamshd) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
+ gf_afr_mt_brick_pos_t);
+ if (!priv->shd.pos)
+ goto out;
+
+ priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
+ gf_afr_mt_int32_t);
+ if (!priv->shd.pending)
+ goto out;
+
+ priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
+ child_count, gf_afr_mt_shd_bool_t);
+ if (!priv->shd.inprogress)
+ goto out;
+ priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
+ gf_afr_mt_shd_timer_t);
+ if (!priv->shd.timer)
+ goto out;
+
+ priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.healed)
+ goto out;
+
+ priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.heal_failed)
+ goto out;
+
+ priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.split_brain)
+ goto out;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+ priv->root_inode = inode_ref (this->itable->root);
+ GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out);
+ GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
+ ret = afr_initialise_statistics (this);
+ if (ret)
+ goto out;
ret = 0;
out:
return ret;
@@ -364,6 +477,8 @@ fini (xlator_t *this)
priv = this->private;
this->private = NULL;
afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
return 0;
}
@@ -382,6 +497,9 @@ struct xlator_fops fops = {
.finodelk = afr_finodelk,
.entrylk = afr_entrylk,
.fentrylk = afr_fentrylk,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
/* inode read */
.access = afr_access,
@@ -389,6 +507,7 @@ struct xlator_fops fops = {
.fstat = afr_fstat,
.readlink = afr_readlink,
.getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
.readv = afr_readv,
/* inode write */
@@ -396,9 +515,11 @@ struct xlator_fops fops = {
.truncate = afr_truncate,
.ftruncate = afr_ftruncate,
.setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
.setattr = afr_setattr,
.fsetattr = afr_fsetattr,
.removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
/* dir read */
.opendir = afr_opendir,
@@ -431,33 +552,79 @@ struct xlator_cbks cbks = {
struct volume_options options[] = {
{ .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"
+ },
+ { .key = {"read-subvolume-index" },
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"
+ },
+ { .key = {"read-hash-mode" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 2,
+ .default_value = "0",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option"
+ "0 = first responder, "
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume), "
+ "2 = hash by GFID of file and client PID",
+ },
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume(i.e. Brick) to read from if "
+ "read-subvolume is not explicitly set.",
},
{ .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "If a split-brain happens choose subvol/brick set by "
+ "this option as source."
},
{ .key = {"background-self-heal-count"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
.default_value = "16",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of self-heals that can be "
+ " performed in background without blocking the fop"
},
{ .key = {"data-self-heal"},
.type = GF_OPTION_TYPE_STR,
- .default_value = "",
.value = {"1", "on", "yes", "true", "enable",
"0", "off", "no", "false", "disable",
"open"},
.default_value = "on",
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."
},
{ .key = {"data-self-heal-algorithm"},
.type = GF_OPTION_TYPE_STR,
- .default_value = "",
.description = "Select between \"full\", \"diff\". The "
"\"full\" algorithm copies the entire file from "
"source to sink. The \"diff\" algorithm copies to "
"sink only those blocks whose checksums don't match "
- "with those of source.",
- .value = { "diff", "full", "" }
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = { "diff", "full"}
},
{ .key = {"data-self-heal-window-size"},
.type = GF_OPTION_TYPE_INT,
@@ -470,26 +637,43 @@ struct volume_options options[] = {
{ .key = {"metadata-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."
},
{ .key = {"entry-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."
},
{ .key = {"data-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Data fops like write/truncate will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"metadata-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Metadata fops like setattr/setxattr will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"entry-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Entry fops like create/unlink will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"optimistic-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."
},
{ .key = {"strict-readdir"},
.type = GF_OPTION_TYPE_BOOL,
@@ -498,18 +682,55 @@ struct volume_options options[] = {
{ .key = {"inodelk-trace"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"
},
{ .key = {"entrylk-trace"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"
+ },
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "aquistion fails on any server, then the held locks "
+ "are unlocked and revert to a blocking locked mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempt to acquire lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking lock as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimzed\" transaction."
+
},
{ .key = {"self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files"
+ "will not be performed if this option is turned off."
+ },
+ { .key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."
},
{ .key = {"quorum-type"},
.type = GF_OPTION_TYPE_STR,
- .value = { "none", "auto", "fixed", "" },
+ .value = { "none", "auto", "fixed"},
.default_value = "none",
.description = "If value is \"fixed\" only allow writes if "
"quorum-count bricks are present. If value is "
@@ -526,5 +747,47 @@ struct volume_options options[] = {
"this many bricks or present. Other quorum types "
"will OVERWRITE this value.",
},
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ { .key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"
+ },
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ { .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .default_value = "1KB",
+ },
+ { .key = {"readdir-failover"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "readdir(p) will not failover if this option is off",
+ .default_value = "on",
+ },
+ { .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 8568f1fd8..21064db58 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -32,9 +23,15 @@
#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
+#include "timer.h"
#define AFR_XATTR_PREFIX "trusted.afr"
#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
+
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
struct _pump_private;
@@ -43,8 +40,7 @@ typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
int32_t op_errno);
typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
+ int32_t op_error, int32_t op_errno);
typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
@@ -66,10 +62,8 @@ typedef enum {
AFR_INODE_SET_READ_CTX = 1,
AFR_INODE_RM_STALE_CHILDREN,
AFR_INODE_SET_OPENDIR_DONE,
- AFR_INODE_SET_SPLIT_BRAIN,
AFR_INODE_GET_READ_CTX,
AFR_INODE_GET_OPENDIR_DONE,
- AFR_INODE_GET_SPLIT_BRAIN,
} afr_inode_op_t;
typedef struct afr_inode_params_ {
@@ -83,16 +77,41 @@ typedef struct afr_inode_params_ {
} u;
} afr_inode_params_t;
+typedef enum afr_spb_state {
+ DONT_KNOW,
+ SPB,
+ NO_SPB
+} afr_spb_state_t;
+
typedef struct afr_inode_ctx_ {
uint64_t masks;
int32_t *fresh_children;//increasing order of latency
+ afr_spb_state_t mdata_spb;
+ afr_spb_state_t data_spb;
+ uint32_t open_fd_count;
} afr_inode_ctx_t;
+typedef enum {
+ NONE,
+ INDEX,
+ INDEX_TO_BE_HEALED,
+ FULL,
+} afr_crawl_type_t;
+
typedef struct afr_self_heald_ {
- gf_boolean_t enabled;
- gf_boolean_t pending;
- gf_boolean_t inprogress;
- afr_child_pos_t *pos;
+ gf_boolean_t enabled;
+ gf_boolean_t iamshd;
+ afr_crawl_type_t *pending;
+ gf_boolean_t *inprogress;
+ afr_child_pos_t *pos;
+ gf_timer_t **timer;
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
+ eh_t **statistics;
+ void **crawl_events;
+ char *node_uuid;
+ int timeout;
} afr_self_heald_t;
typedef struct _afr_private {
@@ -126,6 +145,7 @@ typedef struct _afr_private {
gf_boolean_t entry_change_log; /* on/off */
int read_child; /* read-subvolume */
+ unsigned int hash_mode; /* for when read_child is not set */
int favorite_child; /* subvolume to be preferred in resolving
split-brain cases */
@@ -146,14 +166,48 @@ typedef struct _afr_private {
struct list_head saved_fds; /* list of fds on which locks have succeeded */
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
+ uint32_t post_op_delay_secs;
unsigned int quorum_count;
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
afr_self_heald_t shd;
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ gf_boolean_t readdir_failover;
+ uint64_t sh_readdir_size;
+ gf_boolean_t ensure_durability;
+ char *sh_domain;
} afr_private_t;
+typedef enum {
+ AFR_SELF_HEAL_NOT_ATTEMPTED,
+ AFR_SELF_HEAL_STARTED,
+ AFR_SELF_HEAL_FAILED,
+ AFR_SELF_HEAL_SYNC_BEGIN,
+} afr_self_heal_status;
+
typedef struct {
+ afr_self_heal_status gfid_or_missing_entry_self_heal;
+ afr_self_heal_status metadata_self_heal;
+ afr_self_heal_status data_self_heal;
+ afr_self_heal_status entry_self_heal;
+} afr_sh_status_for_all_type;
+
+typedef enum {
+ AFR_SELF_HEAL_ENTRY,
+ AFR_SELF_HEAL_METADATA,
+ AFR_SELF_HEAL_DATA,
+ AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY,
+ AFR_SELF_HEAL_INVALID = -1,
+} afr_self_heal_type;
+
+typedef enum {
+ AFR_CHECK_ALL,
+ AFR_CHECK_SPECIFIC,
+} afr_sh_fail_check_type;
+
+struct afr_self_heal_ {
/* External interface: These are variables (some optional) that
are set by whoever has triggered self-heal */
@@ -162,6 +216,8 @@ typedef struct {
gf_boolean_t do_entry_self_heal;
gf_boolean_t do_gfid_self_heal;
gf_boolean_t do_missing_entry_self_heal;
+ gf_boolean_t force_confirm_spb; /* Check for split-brains even when
+ self-heal is turned off */
gf_boolean_t forced_merge; /* Is this a self-heal triggered to
forcibly merge the directories? */
@@ -179,7 +235,7 @@ typedef struct {
background, this function will be called as soon as possible. */
int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, int32_t sh_failed);
/* End of external interface members */
@@ -192,7 +248,6 @@ typedef struct {
afr_expunge_done_cbk_t expunge_done;
afr_impunge_done_cbk_t impunge_done;
- int32_t impunge_ret_child;
/* array of xattr's, one for each child */
dict_t **xattr;
@@ -226,13 +281,13 @@ typedef struct {
unsigned char *locked_nodes;
int lock_count;
- mode_t impunging_entry_mode;
const char *linkname;
gf_boolean_t entries_skipped;
- int op_failed;
-
+ gf_boolean_t actual_sh_started;
+ gf_boolean_t sync_done;
gf_boolean_t data_lock_held;
+ gf_boolean_t sh_dom_lock_held;
gf_boolean_t eof_reached;
fd_t *healing_fd;
int file_has_holes;
@@ -243,16 +298,22 @@ typedef struct {
uint8_t *checksum;
afr_post_remove_call_t post_remove_call;
- loc_t parent_loc;
+ char *data_sh_info;
+ char *metadata_sh_info;
+ loc_t parent_loc;
call_frame_t *orig_frame;
call_frame_t *old_loop_frame;
gf_boolean_t unwound;
afr_sh_algo_private_t *private;
+ afr_sh_status_for_all_type afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action;
+ struct afr_sh_algorithm *algo;
afr_lock_cbk_t data_lock_success_handler;
afr_lock_cbk_t data_lock_failure_handler;
+ gf_boolean_t data_lock_block;
int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
@@ -260,7 +321,9 @@ typedef struct {
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
call_frame_t *sh_frame;
-} afr_self_heal_t;
+};
+
+typedef struct afr_self_heal_ afr_self_heal_t;
typedef enum {
AFR_DATA_TRANSACTION, /* truncate, write, ... */
@@ -322,11 +385,31 @@ afr_index_for_transaction_type (afr_transaction_type type)
return -1; /* make gcc happy */
}
+typedef struct {
+ loc_t loc;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
+
+} afr_entry_lockee_t;
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2);
+
+typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
typedef struct {
loc_t *lk_loc;
- struct gf_flock lk_flock;
+ int lockee_count;
+ afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
@@ -335,23 +418,22 @@ typedef struct {
unsigned char *locked_nodes;
unsigned char *lower_locked_nodes;
- unsigned char *inode_locked_nodes;
- unsigned char *entry_locked_nodes;
selfheal_lk_type_t selfheal_lk_type;
transaction_lk_type_t transaction_lk_type;
int32_t lock_count;
- int32_t inodelk_lock_count;
int32_t entrylk_lock_count;
uint64_t lock_number;
int32_t lk_call_count;
int32_t lk_expected_count;
+ int32_t lk_attempted_count;
int32_t lock_op_ret;
int32_t lock_op_errno;
afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
} afr_internal_lock_t;
typedef struct _afr_locked_fd {
@@ -359,21 +441,29 @@ typedef struct _afr_locked_fd {
struct list_head list;
} afr_locked_fd_t;
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ int32_t op_errno;
+};
+
typedef struct _afr_local {
int uid;
int gid;
unsigned int call_count;
unsigned int success_count;
unsigned int enoent_count;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
- unsigned int govinda_gOvinda;
+ unsigned int unhealable;
unsigned int read_child_index;
unsigned char read_child_returned;
unsigned int first_up_child;
- pid_t saved_pid;
+ gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
@@ -384,7 +474,6 @@ typedef struct _afr_local {
loc_t newloc;
fd_t *fd;
- unsigned char *fd_open_on;
glusterfs_fop_t fop;
@@ -406,13 +495,25 @@ typedef struct _afr_local {
dict_t *dict;
int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
+
- gf_boolean_t fop_paused;
- int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
+
+ int allow_sh_for_running_transaction;
+
+
+ /* This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
*/
int op;
@@ -423,6 +524,7 @@ typedef struct _afr_local {
} statfs;
struct {
+ uint32_t parent_entrylk;
uuid_t gfid_req;
inode_t *inode;
struct iatt buf;
@@ -434,11 +536,13 @@ typedef struct _afr_local {
int32_t read_child;
int32_t *sources;
int32_t *success_children;
+ int32_t **pending_matrix;
+ gf_boolean_t fresh_lookup;
+ gf_boolean_t possible_spb;
} lookup;
struct {
int32_t flags;
- int32_t wbflags;
} open;
struct {
@@ -471,13 +575,14 @@ typedef struct _afr_local {
struct {
char *name;
int last_index;
- long pathinfo_len;
+ long xattr_len;
} getxattr;
struct {
size_t size;
off_t offset;
int last_index;
+ uint32_t flags;
} readv;
/* dir read */
@@ -495,7 +600,7 @@ typedef struct _afr_local {
int32_t op_errno;
size_t size;
off_t offset;
-
+ dict_t *dict;
gf_boolean_t failed;
int last_index;
} readdir;
@@ -504,44 +609,34 @@ typedef struct _afr_local {
struct {
struct iatt prebuf;
struct iatt postbuf;
+ } inode_wfop; //common structure for all inode-write-fops
+ struct {
int32_t op_ret;
struct iovec *vector;
struct iobref *iobref;
int32_t count;
off_t offset;
+ uint32_t flags;
} writev;
struct {
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync;
-
- struct {
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} truncate;
struct {
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} ftruncate;
struct {
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} setattr;
struct {
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} fsetattr;
struct {
@@ -550,96 +645,85 @@ typedef struct _afr_local {
} setxattr;
struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
+
+ struct {
char *name;
} removexattr;
+ struct {
+ dict_t *xattr;
+ } xattrop;
+
+ struct {
+ dict_t *xattr;
+ } fxattrop;
+
/* dir write */
struct {
- fd_t *fd;
- dict_t *params;
- int32_t flags;
- mode_t mode;
inode_t *inode;
struct iatt buf;
struct iatt preparent;
struct iatt postparent;
- struct iatt read_child_buf;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; //common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
} create;
struct {
dev_t dev;
mode_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
} mknod;
struct {
int32_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
} mkdir;
struct {
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink;
-
- struct {
- int flags;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
+ int flags;
} rmdir;
struct {
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preoldparent;
- struct iatt prenewparent;
- struct iatt postoldparent;
- struct iatt postnewparent;
- } rename;
-
- struct {
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } link;
-
- struct {
- inode_t *inode;
dict_t *params;
- struct iatt buf;
- struct iatt read_child_buf;
char *linkpath;
- struct iatt preparent;
- struct iatt postparent;
} symlink;
+ struct {
+ int32_t mode;
+ off_t offset;
+ size_t len;
+ } fallocate;
+
+ struct {
+ off_t offset;
+ size_t len;
+ } discard;
+
struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
+ off_t offset;
+ size_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
+
+
} cont;
struct {
off_t start, len;
+ gf_boolean_t eager_lock_on;
int *eager_lock;
char *basename;
@@ -650,12 +734,18 @@ typedef struct _afr_local {
afr_transaction_type type;
- int success_count;
- int erase_pending;
- int failure_count;
+ /* pre-compute the post piggyback status before
+ entering POST-OP phase
+ */
+ int *postop_piggybacked;
+
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
+
+ struct list_head eager_locked;
- int last_tried;
- int32_t *child_errno;
+ int32_t **txn_changelog;//changelog after pre+post ops
unsigned char *pre_op;
call_frame_t *main_frame;
@@ -674,6 +764,15 @@ typedef struct _afr_local {
afr_self_heal_t self_heal;
struct marker_str marker;
+
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
+
+ mode_t umask;
+ int xflag;
+ gf_boolean_t do_discovery;
+ struct afr_reply *replies;
} afr_local_t;
typedef enum {
@@ -683,11 +782,6 @@ typedef enum {
} afr_fd_open_status_t;
typedef struct {
- struct list_head call_list;
- call_frame_t *frame;
-} afr_fd_paused_call_t;
-
-typedef struct {
unsigned int *pre_op_done;
afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
unsigned int *pre_op_piggyback;
@@ -696,7 +790,6 @@ typedef struct {
unsigned int *lock_acquired;
int flags;
- int32_t wbflags;
uint64_t up_count; /* number of CHILD_UPs this fd has seen */
uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
@@ -707,20 +800,32 @@ typedef struct {
struct list_head entries; /* needed for readdir failover */
unsigned char *locked_on; /* which subvolumes locks have been successful */
- struct list_head paused_calls; /* queued calls while fix_open happens */
+
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
+ int call_child;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+
+ /* list of frames currently in progress */
+ struct list_head eager_locked;
} afr_fd_ctx_t;
/* try alloc and if it fails, goto label */
-#define ALLOC_OR_GOTO(var, type, label) do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_afr_mt_##type); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
+#define AFR_LOCAL_ALLOC_OR_GOTO(var, label) do { \
+ var = mem_get0 (THIS->local_pool); \
+ if (!var) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "out of memory :("); \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
} while (0);
@@ -741,8 +846,14 @@ int
pump_command_reply (call_frame_t *frame, xlator_t *this);
int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, ...);
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
+
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count);
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
@@ -755,7 +866,7 @@ afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
unsigned char *locked_nodes);
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this);
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
int
afr_set_lock_number (call_frame_t *frame, xlator_t *this);
@@ -779,13 +890,16 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this);
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
-void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count);
int pump_start (call_frame_t *frame, xlator_t *this);
int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+
+int
afr_fd_ctx_set (xlator_t *this, fd_t *fd);
int32_t
@@ -795,8 +909,8 @@ void
afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
int32_t *fresh_children);
-void
-afr_build_parent_loc (loc_t *parent, loc_t *child);
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
unsigned int
afr_up_children_count (unsigned char *child_up, unsigned int child_count);
@@ -827,11 +941,12 @@ gf_boolean_t
afr_is_split_brain (xlator_t *this, inode_t *inode);
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set);
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb);
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags);
+ fd_t *fd, dict_t *xdata);
void
afr_set_opendir_done (xlator_t *this, inode_t *inode);
@@ -858,22 +973,27 @@ afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
- } while (0);
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0)
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
} while (0);
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
/* allocate and return a string that is the basename of argument */
static inline char *
AFR_BASENAME (const char *str)
@@ -909,12 +1029,13 @@ afr_first_up_child (unsigned char *child_up, size_t child_count);
int
afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources);
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid);
void
afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child);
+ int32_t config_read_child, uuid_t gfid);
int32_t
afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
@@ -936,8 +1057,9 @@ afr_children_rm_child (int32_t *children, int32_t child,
int32_t child_count);
void
afr_reset_children (int32_t *children, int32_t child_count);
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno);
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio);
int
afr_errno_count (int32_t *children, int *child_errno,
unsigned int child_count, int32_t op_errno);
@@ -978,12 +1100,13 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
xlator_t *this),
int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno));
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open);
-int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop);
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed));
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open);
+
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this);
int
afr_set_elem_count_get (unsigned char *elems, int child_count);
@@ -1000,11 +1123,33 @@ void
afr_set_low_priority (call_frame_t *frame);
int
afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags, int32_t wb_flags);
+ int flags);
gf_boolean_t
afr_have_quorum (char *logname, afr_private_t *priv);
+void
+afr_matrix_cleanup (int32_t **pending, unsigned int m);
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n);
+
+gf_boolean_t
+afr_is_errno_set (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd);
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count);
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count);
/*
* Special value indicating we should use the "auto" quorum method instead of
* a fixed value (including zero to turn off quorum enforcement).
@@ -1024,4 +1169,44 @@ afr_have_quorum (char *logname, afr_private_t *priv);
} \
} while (0);
+
+#define AFR_SBRAIN_MSG "Failed on %s as split-brain is seen. Returning EIO."
+
+#define AFR_SBRAIN_CHECK_FD(fd, label) do { \
+ if (fd->inode && afr_is_split_brain (this, fd->inode)) { \
+ op_errno = EIO; \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG ,uuid_utoa (fd->inode->gfid)); \
+ goto label; \
+ } \
+} while (0)
+
+#define AFR_SBRAIN_CHECK_LOC(loc, label) do { \
+ if (loc->inode && afr_is_split_brain (this, loc->inode)) { \
+ op_errno = EIO; \
+ loc_path (loc, NULL); \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG , loc->path); \
+ goto label; \
+ } \
+} while (0)
+
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this);
+
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 1814c8b9d..a7f72fb30 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
+#include <fnmatch.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -28,8 +20,16 @@
#include "afr-common.c"
#include "defaults.c"
+#include "glusterfs.h"
static uint64_t pump_pid = 0;
+static inline void
+pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+{
+ afr_update_loc_gfids (loc, iatt, parent);
+ uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+}
+
static int
pump_mark_start_pending (xlator_t *this)
{
@@ -165,7 +165,7 @@ pump_save_path (xlator_t *this, const char *path)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
@@ -185,6 +185,7 @@ pump_save_path (xlator_t *this, const char *path)
dict_unref (dict);
+ loc_wipe (&loc);
return 0;
}
@@ -360,7 +361,7 @@ gf_pump_traverse_directory (loc_t *loc)
"pump opendir on %s returned=%d",
loc->path, ret);
- while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
+ while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
free_entries = _gf_true;
if (list_empty (&entries.list)) {
@@ -382,8 +383,7 @@ gf_pump_traverse_directory (loc_t *loc)
}
loc_wipe (&entry_loc);
ret = afr_build_child_loc (this, &entry_loc, loc,
- entry->d_name,
- entry->d_stat.ia_gfid);
+ entry->d_name);
if (ret)
goto out;
@@ -405,7 +405,7 @@ gf_pump_traverse_directory (loc_t *loc)
entry_loc.path);
continue;
}
- afr_fill_loc_info (&entry_loc, &iatt,
+ pump_fill_loc_info (&entry_loc, &iatt,
&parent);
pump_update_resume_state (this, entry_loc.path);
@@ -428,7 +428,7 @@ gf_pump_traverse_directory (loc_t *loc)
gf_pump_traverse_directory (&entry_loc);
}
}
- }
+ }
}
gf_dirent_free (&entries);
@@ -439,6 +439,10 @@ gf_pump_traverse_directory (loc_t *loc)
}
+ ret = syncop_close (fd);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
+
if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
pump_change_state (this, PUMP_STATE_RUNNING);
gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
@@ -478,7 +482,7 @@ pump_update_resume_path (xlator_t *this)
static int32_t
pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_private_t *priv = NULL;
loc_t loc = {0};
@@ -489,7 +493,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
ret = syncop_removexattr (priv->children[source], &loc,
PUMP_PATH);
@@ -505,6 +509,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
"failed with %s", strerror (errno));
}
+ loc_wipe (&loc);
return pump_command_reply (frame, this);
}
@@ -524,7 +529,7 @@ pump_complete_migration (xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
@@ -566,6 +571,7 @@ pump_complete_migration (xlator_t *this)
call_resume (pump_priv->cleaner);
}
+ loc_wipe (&loc);
return 0;
}
@@ -621,7 +627,7 @@ pump_task (void *data)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
xattr_req = dict_new ();
if (!xattr_req) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -659,6 +665,7 @@ out:
if (xattr_req)
dict_unref (xattr_req);
+ loc_wipe (&loc);
return 0;
}
@@ -692,7 +699,7 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
priv = this->private;
pump_priv = priv->pump_private;
- pump_frame->root->lk_owner = (uint64_t) (unsigned long)pump_frame->root;
+ afr_set_lk_owner (pump_frame, this, pump_frame->root);
pump_pid = (uint64_t) (unsigned long)pump_frame->root;
ret = synctask_new (pump_priv->env, pump_task,
@@ -706,8 +713,8 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
}
gf_log (this->name, GF_LOG_DEBUG,
- "setting pump as started lk_owner: %"PRIu64" %"PRIu64,
- pump_frame->root->lk_owner, pump_pid);
+ "setting pump as started lk_owner: %s %"PRIu64,
+ lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
priv->use_afr_in_pump = 1;
out:
@@ -741,7 +748,7 @@ pump_cmd_start_setxattr_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
call_frame_t *prev = NULL;
@@ -793,9 +800,9 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
- data = data_ref (dict_get (local->dict, PUMP_CMD_START));
+ data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
if (!data) {
ret = -1;
gf_log (this->name, GF_LOG_ERROR,
@@ -834,7 +841,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
PUMP_SINK_CHILD(this)->fops->setxattr,
&loc,
dict,
- 0);
+ 0, NULL);
ret = 0;
@@ -848,6 +855,7 @@ out:
if (ret && clnt_cmd)
GF_FREE (clnt_cmd);
+ loc_wipe (&loc);
return ret;
}
@@ -867,7 +875,7 @@ pump_cmd_start_getxattr_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_local_t *local = NULL;
char *path = NULL;
@@ -934,6 +942,7 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
uint64_t number_files = 0;
char filename[PATH_MAX];
+ char summary[PATH_MAX+256];
char *dict_str = NULL;
int32_t op_ret = 0;
@@ -962,16 +971,19 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
}
if (pump_priv->pump_finished) {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ",
- number_files);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64, number_files);
} else {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ",
- number_files, filename);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64":current_file=%s",
+ number_files, filename);
}
+ snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
+ (pump_priv->pump_finished)?1:0, summary);
dict = dict_new ();
- ret = dict_set_dynstr (dict, PUMP_CMD_STATUS, dict_str);
+ ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"dict_set_dynstr returned negative value");
@@ -983,13 +995,12 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
out:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
if (dict)
dict_unref (dict);
- if (dict_str)
- GF_FREE (dict_str);
+ GF_FREE (dict_str);
return 0;
}
@@ -1031,14 +1042,14 @@ pump_execute_start (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
STACK_WIND (frame,
pump_cmd_start_getxattr_cbk,
PUMP_SOURCE_CHILD(this),
PUMP_SOURCE_CHILD(this)->fops->getxattr,
&loc,
- PUMP_PATH);
+ PUMP_PATH, NULL);
ret = 0;
@@ -1048,6 +1059,7 @@ out:
pump_command_reply (frame, this);
}
+ loc_wipe (&loc);
return 0;
}
@@ -1055,7 +1067,7 @@ static int
pump_cleanup_helper (void *data) {
call_frame_t *frame = data;
- pump_xattr_cleaner (frame, 0, frame->this, 0, 0);
+ pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
return 0;
}
@@ -1137,7 +1149,7 @@ pump_execute_abort (call_frame_t *frame, xlator_t *this)
} else {
pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
pump_xattr_cleaner,
- 0, 0);
+ 0, 0, NULL);
}
return 0;
@@ -1150,7 +1162,7 @@ pump_command_status (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_STATUS, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump status command");
@@ -1174,7 +1186,7 @@ pump_command_pause (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_PAUSE, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump pause command");
@@ -1198,7 +1210,7 @@ pump_command_commit (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_COMMIT, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump commit command");
@@ -1222,7 +1234,7 @@ pump_command_abort (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_ABORT, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump abort command");
@@ -1246,7 +1258,7 @@ pump_command_start (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_START, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump start command");
@@ -1268,7 +1280,7 @@ struct _xattr_key {
struct list_head list;
};
-static void
+static int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -1280,13 +1292,14 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
static void
@@ -1314,7 +1327,7 @@ __filter_xattrs (dict_t *dict)
int32_t
pump_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -1349,7 +1362,7 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->getxattr,
&local->loc,
- local->cont.getxattr.name);
+ local->cont.getxattr.name, NULL);
}
out:
@@ -1357,7 +1370,7 @@ out:
if (op_ret >= 0 && dict)
__filter_xattrs (dict);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
}
return 0;
@@ -1365,7 +1378,7 @@ out:
int32_t
pump_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
afr_private_t * priv = NULL;
xlator_t ** children = NULL;
@@ -1388,12 +1401,12 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_getxattr_cbk,
FIRST_CHILD (this),
(FIRST_CHILD (this))->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
- ALLOC_OR_GOTO (frame->local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
ret = afr_local_init (local, priv, &op_errno);
@@ -1408,7 +1421,7 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- if (!strcmp (name, PUMP_CMD_STATUS)) {
+ if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
gf_log (this->name, GF_LOG_DEBUG,
"Hit pump command - status");
pump_execute_status (frame, this);
@@ -1442,12 +1455,12 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
(void *) (long) call_child,
children[call_child], children[call_child]->fops->getxattr,
- loc, name);
+ loc, name, xdata);
ret = 0;
out:
if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1469,14 +1482,14 @@ afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
+ local->op_ret, local->op_errno, NULL);
}
return 0;
}
static int
afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -1545,7 +1558,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->setxattr,
&local->loc,
local->cont.setxattr.dict,
- local->cont.setxattr.flags);
+ local->cont.setxattr.flags, NULL);
if (!--call_count)
break;
@@ -1573,11 +1586,9 @@ pump_setxattr_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
+ AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -1595,12 +1606,10 @@ pump_command_reply (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_INFO,
"Command succeeded");
- dict_unref (local->dict);
-
AFR_STACK_UNWIND (setxattr,
frame,
local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
return 0;
}
@@ -1637,31 +1646,32 @@ pump_parse_command (call_frame_t *frame, xlator_t *this,
int
pump_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
-
int ret = -1;
-
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict,
+ op_errno, out);
+
priv = this->private;
if (!priv->use_afr_in_pump) {
STACK_WIND (frame, default_setxattr_cbk,
FIRST_CHILD (this),
(FIRST_CHILD (this))->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
ret = afr_local_init (local, priv, &op_errno);
if (ret < 0) {
@@ -1710,7 +1720,7 @@ out:
if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
}
return 0;
@@ -1744,7 +1754,7 @@ static int32_t
pump_truncate (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1754,11 +1764,11 @@ pump_truncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
loc,
- offset);
+ offset, xdata);
return 0;
}
- afr_truncate (frame, this, loc, offset);
+ afr_truncate (frame, this, loc, offset, xdata);
return 0;
}
@@ -1767,7 +1777,7 @@ static int32_t
pump_ftruncate (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1777,11 +1787,11 @@ pump_ftruncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
fd,
- offset);
+ offset, xdata);
return 0;
}
- afr_ftruncate (frame, this, fd, offset);
+ afr_ftruncate (frame, this, fd, offset, xdata);
return 0;
}
@@ -1790,7 +1800,7 @@ pump_ftruncate (call_frame_t *frame,
int
pump_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *parms)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1798,10 +1808,10 @@ pump_mknod (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, parms);
+ loc, mode, rdev, umask, xdata);
return 0;
}
- afr_mknod (frame, this, loc, mode, rdev, parms);
+ afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
return 0;
}
@@ -1810,7 +1820,7 @@ pump_mknod (call_frame_t *frame, xlator_t *this,
int
pump_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1818,10 +1828,10 @@ pump_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
- afr_mkdir (frame, this, loc, mode, params);
+ afr_mkdir (frame, this, loc, mode, umask, xdata);
return 0;
}
@@ -1830,7 +1840,7 @@ pump_mkdir (call_frame_t *frame, xlator_t *this,
static int32_t
pump_unlink (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1839,10 +1849,10 @@ pump_unlink (call_frame_t *frame,
default_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
- afr_unlink (frame, this, loc);
+ afr_unlink (frame, this, loc, xflag, xdata);
return 0;
}
@@ -1850,7 +1860,7 @@ pump_unlink (call_frame_t *frame,
static int
pump_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
@@ -1860,11 +1870,11 @@ pump_rmdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
+ loc, flags, xdata);
return 0;
}
- afr_rmdir (frame, this, loc, flags);
+ afr_rmdir (frame, this, loc, flags, xdata);
return 0;
}
@@ -1873,7 +1883,7 @@ pump_rmdir (call_frame_t *frame, xlator_t *this,
int
pump_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1881,10 +1891,10 @@ pump_symlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
return 0;
}
- afr_symlink (frame, this, linkpath, loc, params);
+ afr_symlink (frame, this, linkpath, loc, umask, xdata);
return 0;
}
@@ -1894,7 +1904,7 @@ static int32_t
pump_rename (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1903,10 +1913,10 @@ pump_rename (call_frame_t *frame,
default_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
- afr_rename (frame, this, oldloc, newloc);
+ afr_rename (frame, this, oldloc, newloc, xdata);
return 0;
}
@@ -1916,7 +1926,7 @@ static int32_t
pump_link (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1925,10 +1935,10 @@ pump_link (call_frame_t *frame,
default_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
- afr_link (frame, this, oldloc, newloc);
+ afr_link (frame, this, oldloc, newloc, xdata);
return 0;
}
@@ -1937,7 +1947,7 @@ pump_link (call_frame_t *frame,
static int32_t
pump_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1945,10 +1955,10 @@ pump_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
- afr_create (frame, this, loc, flags, mode, fd, params);
+ afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
return 0;
}
@@ -1958,8 +1968,7 @@ static int32_t
pump_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd,
- int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1968,10 +1977,10 @@ pump_open (call_frame_t *frame,
default_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
- afr_open (frame, this, loc, flags, fd, wbflags);
+ afr_open (frame, this, loc, flags, fd, xdata);
return 0;
}
@@ -1983,8 +1992,8 @@ pump_writev (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t off,
- struct iobref *iobref)
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1996,20 +2005,20 @@ pump_writev (call_frame_t *frame,
fd,
vector,
count,
- off,
- iobref);
+ off, flags,
+ iobref, xdata);
return 0;
}
- afr_writev (frame, this, fd, vector, count, off, iobref);
- return 0;
+ afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
static int32_t
pump_flush (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2018,10 +2027,10 @@ pump_flush (call_frame_t *frame,
default_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
- afr_flush (frame, this, fd);
+ afr_flush (frame, this, fd, xdata);
return 0;
}
@@ -2031,7 +2040,7 @@ static int32_t
pump_fsync (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2041,10 +2050,10 @@ pump_fsync (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
fd,
- flags);
+ flags, xdata);
return 0;
}
- afr_fsync (frame, this, fd, flags);
+ afr_fsync (frame, this, fd, flags, xdata);
return 0;
}
@@ -2053,7 +2062,7 @@ pump_fsync (call_frame_t *frame,
static int32_t
pump_opendir (call_frame_t *frame,
xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2062,10 +2071,10 @@ pump_opendir (call_frame_t *frame,
default_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
- afr_opendir (frame, this, loc, fd);
+ afr_opendir (frame, this, loc, fd, xdata);
return 0;
}
@@ -2075,7 +2084,7 @@ static int32_t
pump_fsyncdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2085,10 +2094,10 @@ pump_fsyncdir (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
fd,
- flags);
+ flags, xdata);
return 0;
}
- afr_fsyncdir (frame, this, fd, flags);
+ afr_fsyncdir (frame, this, fd, flags, xdata);
return 0;
}
@@ -2099,7 +2108,7 @@ pump_xattrop (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2110,10 +2119,10 @@ pump_xattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->xattrop,
loc,
flags,
- dict);
+ dict, xdata);
return 0;
}
- afr_xattrop (frame, this, loc, flags, dict);
+ afr_xattrop (frame, this, loc, flags, dict, xdata);
return 0;
}
@@ -2123,7 +2132,7 @@ pump_fxattrop (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2134,10 +2143,10 @@ pump_fxattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->fxattrop,
fd,
flags,
- dict);
+ dict, xdata);
return 0;
}
- afr_fxattrop (frame, this, fd, flags, dict);
+ afr_fxattrop (frame, this, fd, flags, dict, xdata);
return 0;
}
@@ -2147,9 +2156,17 @@ static int32_t
pump_removexattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
+ name, op_errno, out);
+
+ op_errno = 0;
priv = this->private;
if (!priv->use_afr_in_pump) {
STACK_WIND (frame,
@@ -2157,10 +2174,14 @@ pump_removexattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
loc,
- name);
+ name, xdata);
return 0;
}
- afr_removexattr (frame, this, loc, name);
+ afr_removexattr (frame, this, loc, name, xdata);
+
+ out:
+ if (op_errno)
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -2172,7 +2193,7 @@ pump_readdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t off)
+ off_t off, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2181,21 +2202,18 @@ pump_readdir (call_frame_t *frame,
default_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
+ fd, size, off, xdata);
return 0;
}
- afr_readdir (frame, this, fd, size, off);
+ afr_readdir (frame, this, fd, size, off, xdata);
return 0;
}
static int32_t
-pump_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
+pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *dict)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2204,10 +2222,10 @@ pump_readdirp (call_frame_t *frame,
default_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
+ fd, size, off, dict);
return 0;
}
- afr_readdirp (frame, this, fd, size, off);
+ afr_readdirp (frame, this, fd, size, off, dict);
return 0;
}
@@ -2238,13 +2256,24 @@ pump_release (xlator_t *this,
}
+static int32_t
+pump_forget (xlator_t *this, inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_forget (this, inode);
+
+ return 0;
+}
static int32_t
pump_setattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2253,10 +2282,10 @@ pump_setattr (call_frame_t *frame,
default_setattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
- afr_setattr (frame, this, loc, stbuf, valid);
+ afr_setattr (frame, this, loc, stbuf, valid, xdata);
return 0;
}
@@ -2267,7 +2296,7 @@ pump_fsetattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2276,10 +2305,10 @@ pump_fsetattr (call_frame_t *frame,
default_fsetattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
- afr_fsetattr (frame, this, fd, stbuf, valid);
+ afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
return 0;
}
@@ -2328,7 +2357,7 @@ notify (xlator_t *this, int32_t event,
child_xl = (xlator_t *) data;
- ret = afr_notify (this, event, data);
+ ret = afr_notify (this, event, data, NULL);
switch (event) {
case GF_EVENT_CHILD_DOWN:
@@ -2379,7 +2408,10 @@ init (xlator_t *this)
"Volume is dangling.");
}
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
LOCK_INIT (&priv->lock);
@@ -2405,14 +2437,13 @@ init (xlator_t *this)
priv->metadata_self_heal = 1;
priv->entry_self_heal = 1;
- priv->data_self_heal_algorithm = "";
-
priv->data_self_heal_window_size = 16;
priv->data_change_log = 1;
priv->metadata_change_log = 1;
priv->entry_change_log = 1;
priv->use_afr_in_pump = 1;
+ priv->sh_readdir_size = 65536;
/* Locking options */
@@ -2470,6 +2501,12 @@ init (xlator_t *this)
i++;
}
+ ret = gf_asprintf (&priv->sh_domain, "%s-self-heal", this->name);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
priv->first_lookup = 1;
priv->root_inode = NULL;
@@ -2501,7 +2538,7 @@ init (xlator_t *this)
goto out;
}
- pump_priv->env = syncenv_new (0);
+ pump_priv->env = this->ctx->env;
if (!pump_priv->env) {
gf_log (this->name, GF_LOG_ERROR,
"Could not create new sync-environment");
@@ -2509,6 +2546,15 @@ init (xlator_t *this)
goto out;
}
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
priv->pump_private = pump_priv;
pump_change_state (this, PUMP_STATE_ABORT);
@@ -2533,9 +2579,6 @@ fini (xlator_t *this)
if (!pump_priv)
goto afr_priv;
- if (pump_priv->env)
- syncenv_destroy (pump_priv->env);
-
GF_FREE (pump_priv->resume_path);
LOCK_DESTROY (&pump_priv->resume_path_lock);
LOCK_DESTROY (&pump_priv->pump_state_lock);
@@ -2590,6 +2633,7 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
.release = pump_release,
.releasedir = pump_releasedir,
+ .forget = pump_forget,
};
struct volume_options options[] = {
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
index 9d60e5bea..bc4c31a78 100644
--- a/xlators/cluster/afr/src/pump.h
+++ b/xlators/cluster/afr/src/pump.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __PUMP_H__
@@ -26,12 +17,6 @@
#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-#define PUMP_CMD_START "trusted.glusterfs.pump.start"
-#define PUMP_CMD_COMMIT "trusted.glusterfs.pump.commit"
-#define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort"
-#define PUMP_CMD_PAUSE "trusted.glusterfs.pump.pause"
-#define PUMP_CMD_STATUS "trusted.glusterfs.pump.status"
-
#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index e35058d65..174bea841 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -4,7 +4,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
- dht-common.c dht-inode-write.c dht-inode-read.c \
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
$(top_builddir)/xlators/lib/src/libxlator.c
dht_la_SOURCES = $(dht_common_source) dht.c
@@ -12,22 +12,23 @@ dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module -avoid-version
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module -avoid-version
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module -avoidversion
+switch_la_LDFLAGS = -module -avoid-version
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = dht-common.h dht-mem-types.h \
$(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
CLEANFILES =
uninstall-local:
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index fb149e763..8f61339e6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -31,17 +22,18 @@
#include "dht-common.h"
#include "defaults.h"
#include "byte-order.h"
+#include "glusterfs-acl.h"
#include <sys/time.h>
#include <libgen.h>
-void
+int
dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
{
dict_t *dst = NULL;
int64_t *ptr = 0, *size = NULL;
int32_t ret = -1;
- data_pair_t *data_pair = NULL;
+ data_t *dict_data = NULL;
dst = data;
@@ -53,32 +45,37 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
if (size == NULL) {
gf_log ("dht", GF_LOG_WARNING,
"memory allocation failed");
- return;
+ return -1;
}
ret = dict_set_bin (dst, key, size, sizeof (int64_t));
if (ret < 0) {
gf_log ("dht", GF_LOG_WARNING,
"dht aggregate dict set failed");
GF_FREE (size);
- return;
+ return -1;
}
}
ptr = data_to_bin (value);
if (ptr == NULL) {
gf_log ("dht", GF_LOG_WARNING, "data to bin failed");
- return;
+ return -1;
}
*size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime (THIS, dst, key, value);
+ if (ret < 0)
+ return ret;
} else {
/* compare user xattrs only */
if (!strncmp (key, "user.", strlen ("user."))) {
- ret = dict_lookup (dst, key, &data_pair);
- if (!ret && data) {
- ret = is_data_equal (data_pair->value, value);
+ ret = dict_lookup (dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal (dict_data, value);
if (!ret)
- gf_log ("dht", GF_LOG_WARNING,
+ gf_log ("dht", GF_LOG_DEBUG,
"xattr mismatch for %s", key);
}
}
@@ -87,7 +84,7 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed");
}
- return;
+ return 0;
}
@@ -114,7 +111,7 @@ out:
int
dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -134,7 +131,10 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
ret = dht_layout_set (this, local->inode, layout);
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
@@ -147,6 +147,256 @@ out:
int
+dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
+
+ if (!main_frame)
+ return 0;
+
+ if (local->file_count && local->dir_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset (this, local->cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)", local->loc.path,
+ (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
+ if ((ret > 0) && (ret == conf->subvolume_cnt)) {
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ if (local->inode)
+ dht_layout_set (this, local->inode, layout);
+ }
+
+ DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return ret;
+}
+
+
+int
+dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on %s",
+ local->loc.path, prev->this->name);
+ }
+
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (is_dir) {
+ local->dir_count ++;
+ } else {
+ local->file_count ++;
+
+ if (!is_linkfile) {
+ /* real file */
+ local->cached_subvol = prev->this;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
+
+ local->op_ret = 0;
+
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt) || attempt_unwind) {
+ dht_discover_complete (this, frame);
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->link_xattr_name);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ uuid_copy (local->gfid, loc->gfid);
+
+ discover_frame = copy_frame (frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (discover_frame, dht_discover_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+
+int
dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, dict_t *xattr,
@@ -191,7 +441,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_ret, op_errno, xattr);
if (op_ret == -1) {
- local->op_errno = ENOENT;
+ local->op_errno = op_errno;
gf_log (this->name, GF_LOG_DEBUG,
"lookup of %s on %s returned error (%s)",
local->loc.path, prev->this->name,
@@ -250,6 +500,11 @@ unlock:
dht_layout_set (this, local->inode, layout);
}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
@@ -281,6 +536,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
int is_dir = 0;
int is_linkfile = 0;
+ call_frame_t *copy = NULL;
+ dht_local_t *copy_local = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -339,7 +596,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
layout = local->layout;
is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
if (is_linkfile) {
gf_log (this->name, GF_LOG_INFO,
@@ -351,6 +609,23 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (is_dir) {
+ ret = dht_dir_has_layout (xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->stbuf.ia_ctime,
+ local->stbuf.ia_ctime_nsec,
+ stbuf->ia_ctime,
+ stbuf->ia_ctime_nsec)) {
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+ }
+ }
+ if (local->stbuf.ia_type != IA_INVAL)
+ {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid)) {
+ local->need_selfheal = 1;
+ }
+ }
ret = dht_layout_dir_mismatch (this, layout,
prev->this, &local->loc,
xattr);
@@ -389,7 +664,28 @@ out:
&& (conf && conf->unhashed_sticky_bit)) {
local->stbuf.ia_prot.sticky = 1;
}
- if (local->layout_mismatch) {
+ if (local->need_selfheal) {
+ local->need_selfheal = 0;
+ uuid_copy (local->gfid, local->stbuf.ia_gfid);
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ copy = create_frame (this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init (copy, &local->loc,
+ NULL, 0);
+ if (!copy_local)
+ goto cont;
+ copy_local->stbuf = local->stbuf;
+ copy->local = copy_local;
+ FRAME_SU_DO (copy, dht_local_t);
+ ret = synctask_new (this->ctx->env,
+ dht_dir_attr_heal,
+ dht_dir_attr_heal_done,
+ copy, copy);
+ }
+ }
+cont:
+ if (local->layout_mismatch) {
/* Found layout mismatch in the directory, need to
fix this in the inode context */
dht_layout_unref (this, local->layout);
@@ -415,7 +711,10 @@ out:
local->op_errno = ESTALE;
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
@@ -433,7 +732,8 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -466,8 +766,14 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
local->stbuf.ia_prot.sticky = 1;
}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
unwind:
- WIPE (&local->postparent);
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
@@ -540,7 +846,10 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
"<nil>"));
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret,
@@ -570,7 +879,10 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret,
@@ -586,7 +898,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
hashed_subvol->name);
ret = dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
+ dht_lookup_linkfile_create_cbk, this,
cached_subvol, hashed_subvol, &local->loc);
return ret;
@@ -596,7 +908,8 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
int
dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
int this_call_cnt = 0;
@@ -623,8 +936,9 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *subvol = NULL;
loc_t *loc = NULL;
xlator_t *link_subvol = NULL;
- int ret = -1;
- int32_t fd_count = 0;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -634,6 +948,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
+ conf = this->private;
prev = cookie;
subvol = prev->this;
@@ -655,7 +970,8 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc->path, prev->this->name);
}
- is_linkfile = check_is_linkfile (inode, buf, xattr);
+ is_linkfile = check_is_linkfile (inode, buf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, buf, xattr);
if (is_linkfile) {
@@ -715,7 +1031,7 @@ unlock:
"deleting stale linkfile %s on %s",
loc->path, subvol->name);
STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc);
+ subvol, subvol->fops->unlink, loc, 0, NULL);
return 0;
}
}
@@ -797,7 +1113,16 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) failed (%s)",
local->loc.path, subvol->name, strerror (op_errno));
- goto err;
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
}
if (check_is_dir (inode, stbuf, xattr)) {
@@ -807,7 +1132,7 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
goto err;
}
- if (check_is_linkfile (inode, stbuf, xattr)) {
+ if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) reached link",
local->loc.path, subvol->name);
@@ -835,9 +1160,12 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
op_errno = EINVAL;
}
-unwind:
- WIPE (postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+unwind:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
@@ -919,7 +1247,6 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
call_frame_t *prev = NULL;
int ret = 0;
- uint64_t tmp_layout = 0;
dht_layout_t *parent_layout = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
@@ -950,8 +1277,10 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
(loc->parent)) {
- ret = inode_ctx_get (loc->parent, this, &tmp_layout);
- parent_layout = (dht_layout_t *)(long)tmp_layout;
+ ret = dht_inode_ctx_layout_get (loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout)
+ goto out;
if (parent_layout->search_unhashed) {
local->op_errno = ENOENT;
dht_lookup_everywhere (frame, this, loc);
@@ -980,7 +1309,8 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
if (!is_linkfile) {
/* non-directory and not a linkfile */
@@ -1020,7 +1350,10 @@ out:
* from each of the subvolume. See dht_iatt_merge for reference.
*/
- WIPE (postparent);
+ if (!op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
@@ -1029,6 +1362,39 @@ err:
return 0;
}
+/* For directories, check if acl xattrs have been requested (by the acl xlator),
+ * if not, request for them. These xattrs are needed for dht dir self-heal to
+ * perform proper self-healing of dirs
+ */
+void
+dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+{
+ int ret = 0;
+
+ GF_ASSERT (inode);
+ GF_ASSERT (xattr_req);
+
+ if (inode->ia_type != IA_IFDIR)
+ return;
+
+ if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
+
+ if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
+
+ return;
+}
int
dht_lookup (call_frame_t *frame, xlator_t *this,
@@ -1049,7 +1415,6 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
conf = this->private;
if (!conf)
@@ -1086,6 +1451,12 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
local->xattr_req = dict_new ();
}
+ if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) &&
+ !__is_root_gfid (loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_discover (frame, this, loc);
+ return 0;
+ }
if (!hashed_subvol)
hashed_subvol = dht_subvol_get_hashed (this, loc);
@@ -1118,7 +1489,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
* revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (IA_ISDIR (local->inode->ia_type)) {
local->call_cnt = call_cnt = conf->subvolume_cnt;
@@ -1138,6 +1509,9 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
ret = dict_set_uint32 (local->xattr_req,
GLUSTERFS_OPEN_FD_COUNT, 4);
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+
for (i = 0; i < call_cnt; i++) {
subvol = layout->list[i].xlator;
@@ -1150,16 +1524,19 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
do_fresh_lookup:
/* TODO: remove the hard-coding */
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
ret = dict_set_uint32 (local->xattr_req,
- DHT_LINKFILE_KEY, 256);
+ conf->link_xattr_name, 256);
/* need it for self-healing linkfiles which is
'in-migration' state */
ret = dict_set_uint32 (local->xattr_req,
GLUSTERFS_OPEN_FD_COUNT, 4);
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no subvolume in layout for path=%s, "
@@ -1193,7 +1570,8 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -1201,7 +1579,7 @@ err:
int
dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1225,14 +1603,18 @@ dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent = *postparent;
local->preparent = *preparent;
- WIPE (&local->postparent);
- WIPE (&local->preparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
}
unlock:
UNLOCK (&frame->lock);
DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -1241,7 +1623,7 @@ unlock:
int
dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1253,7 +1635,8 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
- if (op_ret == -1) {
+ if ((op_ret == -1) && !((op_errno == ENOENT) ||
+ (op_errno == ENOTCONN))) {
local->op_errno = op_errno;
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
@@ -1266,7 +1649,7 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unlock:
UNLOCK (&frame->lock);
- if (op_ret == -1)
+ if (local->op_ret == -1)
goto err;
cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
@@ -1280,53 +1663,19 @@ unlock:
STACK_WIND (frame, dht_unlink_cbk,
cached_subvol, cached_subvol->fops->unlink,
- &local->loc);
+ &local->loc, local->flags, NULL);
return 0;
err:
DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
-static int
-dht_ufo_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-
int
dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -1352,7 +1701,8 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, NULL);
}
return 0;
@@ -1375,114 +1725,249 @@ fill_layout_info (dht_layout_t *layout, char *buf)
}
}
+void
+dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
+ char *xattr_buf, int32_t alloc_len,
+ int flag, char *layout_buf)
+{
+ if (flag && local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name,
+ layout_buf);
+ else if (local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "(<"DHT_PATHINFO_HEADER"%s> %s)",
+ this->name, local->xattr_val);
+ else if (flag)
+ snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
+ this->name, layout_buf);
+}
+
int
-dht_pathinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
+{
+ int ret = -1;
+ char *value = NULL;
+ int32_t plen = 0;
+
+ ret = dict_get_str (xattr, local->xsel, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Subvolume %s returned -1 (%s)", this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->alloc_len += strlen(value);
+
+ if (!local->xattr_val) {
+ local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (local->xattr_val) {
+ plen = strlen (local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC (local->xattr_val,
+ local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ (void) strcat (local->xattr_val, value);
+ (void) strcat (local->xattr_val, " ");
+ local->op_ret = 0;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {0,};
+
+ if (flag)
+ fill_layout_info (local->layout, layout_buf);
+
+ *dict = dict_new ();
+ if (!*dict)
+ goto out;
+
+ local->xattr_val[strlen (local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we dont have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen (this->name))
+ + strlen (layout_buf)
+ + 40;
+ xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
+
+ if (XATTR_IS_PATHINFO (local->xsel)) {
+ (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
+ local->alloc_len, flag,
+ layout_buf);
+ } else if (XATTR_IS_NODE_UUID (local->xsel)) {
+ (void) snprintf (xattr_buf, local->alloc_len, "%s",
+ local->xattr_val);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown local->xsel (%s)", local->xsel);
+ goto out;
+ }
+
+ ret = dict_set_dynstr (*dict, local->xsel, xattr_buf);
+ GF_FREE (local->xattr_val);
+
+ out:
+ return ret;
+}
+
+int
+dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
int ret = 0;
- int flag = 0;
+ dht_local_t *local = NULL;
int this_call_cnt = 0;
- char *value_got = NULL;
- char layout_buf[8192] = {0,};
- char *xattr_buf = NULL;
dict_t *dict = NULL;
- int32_t alloc_len = 0;
- int32_t plen = 0;
- local = frame->local;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
- if (op_ret != -1) {
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value_got);
- if (!ret) {
- alloc_len = strlen (value_got);
+ local = frame->local;
- /**
- * allocate the buffer:- we allocate 10 bytes extra in case we need to
- * append ' Link: ' in the buffer for another STACK_WIND
- */
- if (!local->pathinfo) {
- alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
- local->pathinfo = GF_CALLOC (alloc_len, sizeof (char), gf_common_mt_char);
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr err (%s) for dir",
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
}
- if (local->pathinfo) {
- plen = strlen (local->pathinfo);
- if (plen) {
- /* extra byte(s) for \0 to be safe */
- alloc_len += (plen + 2);
- local->pathinfo = GF_REALLOC (local->pathinfo,
- alloc_len);
- if (!local->pathinfo)
- goto out;
- }
-
- strcat (local->pathinfo, value_got);
- }
+ goto unlock;
}
+
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
}
+ unlock:
+ UNLOCK (&frame->lock);
- out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->layout->cnt > 1) {
- /* Set it for directory */
- fill_layout_info (local->layout, layout_buf);
- flag = 1;
- }
+ if (!is_last_call (this_call_cnt))
+ goto out;
+
+ /* -- last call: do patch ups -- */
- dict = dict_new ();
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- /* we would need max-to-max this many bytes to create pathinfo string */
- alloc_len += (2 * strlen (this->name)) + strlen (layout_buf) + 40;
- xattr_buf = GF_CALLOC (alloc_len, sizeof (char), gf_common_mt_char);
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
- if (flag && local->pathinfo)
- snprintf (xattr_buf, alloc_len, "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
- this->name, local->pathinfo, this->name,
- layout_buf);
- else if (local->pathinfo)
- snprintf (xattr_buf, alloc_len, "(<"DHT_PATHINFO_HEADER"%s> %s)",
- this->name, local->pathinfo);
- else if (flag)
- snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
- this->name, layout_buf);
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
- ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY,
- xattr_buf);
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
+ out:
+ return 0;
+}
- if (local->pathinfo)
- GF_FREE (local->pathinfo);
+int
+dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ call_frame_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ local = frame->local;
+ prev = cookie;
- if (dict)
- dict_unref (dict);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 "
+ "(%s)", prev->this->name, strerror (op_errno));
+ goto unwind;
+ }
- return 0;
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
+ goto unwind;
}
- if (local->pathinfo)
- strcat (local->pathinfo, " Link: ");
- if (local->hashed_subvol) {
- /* This will happen if there pending */
- STACK_WIND (frame, dht_pathinfo_getxattr_cbk, local->hashed_subvol,
- local->hashed_subvol->fops->getxattr,
- &local->loc, local->key);
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
- return 0;
- }
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
- gf_log ("this->name", GF_LOG_ERROR, "Unable to find hashed_subvol for path"
- " %s", local->pathinfo);
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno,
+ NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, dict);
return 0;
}
int
dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
int ret = 0;
char *value = NULL;
@@ -1497,21 +1982,24 @@ dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
return 0;
}
int
dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
int this_call_cnt = 0;
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ conf = this->private;
local = frame->local;
this_call_cnt = dht_frame_return (frame);
@@ -1519,8 +2007,8 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!xattr || (op_ret == -1))
goto out;
- if (dict_get (xattr, "trusted.glusterfs.dht")) {
- dict_del (xattr, "trusted.glusterfs.dht");
+ if (dict_get (xattr, conf->xattr_name)) {
+ dict_del (xattr, conf->xattr_name);
}
local->op_ret = 0;
@@ -1537,24 +2025,88 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
out:
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, local->xattr);
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, NULL);
}
return 0;
}
int32_t
dht_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
+dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+
+ if (op_ret != -1) {
+ if (local->xattr)
+ dict_unref (local->xattr);
+ local->xattr = dict_ref (xattr);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+ local->xattr_req = dict_ref (xdata);
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, local->xattr_req);
+ }
+
+ return 0;
+}
+
+
+int
+dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
+
+
+ local = frame->local;
+ layout = local->layout;
+
+ cnt = local->call_cnt = layout->cnt;
+
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
+ }
+
+ return 0;
+}
+
+
+int
dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+ loc_t *loc, const char *key, dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
{
+
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
xlator_t *cached_subvol = NULL;
@@ -1570,7 +2122,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
@@ -1598,24 +2149,67 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (key &&
+ (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
+ && DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
+ return 0;
+ }
+
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+ if (key && DHT_IS_DIR(layout) &&
+ ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)
+ || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
+ (void) strncpy (local->xsel, key, 256);
+ cnt = local->call_cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_vgetxattr_dir_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
+ }
+ return 0;
+ }
+
+ /* node-uuid or pathinfo for files */
+ if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
+ || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) {
cached_subvol = local->cached_subvol;
+ (void) strncpy (local->xsel, key, 256);
local->call_cnt = 1;
- if (hashed_subvol != cached_subvol) {
- local->call_cnt = 2;
- local->hashed_subvol = hashed_subvol;
- }
-
- STACK_WIND (frame, dht_pathinfo_getxattr_cbk, cached_subvol,
- cached_subvol->fops->getxattr, loc, key);
+ STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol->fops->getxattr, loc, key, NULL);
return 0;
}
+
if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
if (hashed_subvol == cached_subvol) {
op_errno = ENODATA;
goto err;
@@ -1623,7 +2217,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (hashed_subvol) {
STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
hashed_subvol->fops->getxattr, loc,
- GF_XATTR_PATHINFO_KEY);
+ GF_XATTR_PATHINFO_KEY, NULL);
return 0;
}
op_errno = ENODATA;
@@ -1631,13 +2225,13 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
if (key && (!strcmp (GF_XATTR_MARKER_KEY, key))
- && (-1 == frame->root->pid)) {
-
- if (loc->inode-> ia_type == IA_IFDIR) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
cnt = layout->cnt;
} else {
cnt = 1;
}
+
sub_volumes = alloca ( cnt * sizeof (xlator_t *));
for (i = 0; i < cnt; i++)
*(sub_volumes + i) = layout->list[i].xlator;
@@ -1645,7 +2239,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (cluster_getmarkerattr (frame, this, loc, key,
local, dht_getxattr_unwind,
sub_volumes, cnt,
- MARKER_UUID_TYPE, conf->vol_uuid)) {
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ conf->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
@@ -1655,8 +2250,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (key && *conf->vol_uuid) {
if ((match_uuid_local (key, conf->vol_uuid) == 0) &&
- (-1 == frame->root->pid)) {
- if (loc->inode-> ia_type == IA_IFDIR) {
+ (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
cnt = layout->cnt;
} else {
cnt = 1;
@@ -1669,6 +2264,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
local, dht_getxattr_unwind,
sub_volumes, cnt,
MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
conf->vol_uuid)) {
op_errno = EINVAL;
goto err;
@@ -1678,7 +2274,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- if (loc->inode-> ia_type == IA_IFDIR) {
+ if (DHT_IS_DIR(layout)) {
cnt = local->call_cnt = layout->cnt;
} else {
cnt = local->call_cnt = 1;
@@ -1688,29 +2284,100 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
subvol = layout->list[i].xlator;
STACK_WIND (frame, dht_getxattr_cbk,
subvol, subvol->fops->getxattr,
- loc, key);
+ loc, key, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+#undef DHT_IS_DIR
+
+int
+dht_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ if ((fd->inode->ia_type == IA_IFDIR)
+ && (strncmp (key, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY) != 0))) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->fgetxattr,
+ fd, key, NULL);
+ }
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xattr, int flags)
+ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
if (!local) {
@@ -1729,13 +2396,13 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
local->call_cnt = 1;
STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
- fd, xattr, flags);
+ fd, xattr, flags, NULL);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1743,16 +2410,18 @@ err:
static int
dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
int i = -1;
int ret = -1;
@@ -1784,7 +2453,7 @@ dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP);
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
}
return 0;
@@ -1792,7 +2461,7 @@ out:
int
dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags)
+ loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
@@ -1804,17 +2473,19 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
data_t *tmp = NULL;
uint32_t dir_spread = 0;
char value[4096] = {0,};
- int forced_rebalance = 0;
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
int call_cnt = 0;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
conf = this->private;
+
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
+
local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
if (!local) {
op_errno = ENOMEM;
@@ -1839,25 +2510,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
local->call_cnt = call_cnt = layout->cnt;
- /* This key is sent by Unified File and Object storage
- * to test xattr support in backend.
- */
- tmp = dict_get (xattr, "user.ufo-test");
- if (tmp) {
- if (IA_ISREG (loc->inode->ia_type)) {
- op_errno = ENOTSUP;
- goto err;
- }
- local->op_ret = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_ufo_xattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags);
- }
- return 0;
- }
-
tmp = dict_get (xattr, "distribute.migrate-data");
if (tmp) {
if (IA_ISDIR (loc->inode->ia_type)) {
@@ -1869,9 +2521,20 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(ie, 'target' subvolume given there, etc) */
memcpy (value, tmp->data, tmp->len);
if (strcmp (value, "force") == 0)
- forced_rebalance = 1;
+ forced_rebalance =
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
+ if (!local->rebalance.target_node) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
local->rebalance.from_subvol = local->cached_subvol;
if (local->rebalance.target_node == local->rebalance.from_subvol) {
@@ -1911,7 +2574,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_checking_pathinfo_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->getxattr,
- loc, GF_XATTR_PATHINFO_KEY);
+ loc, GF_XATTR_PATHINFO_KEY, NULL);
}
return 0;
}
@@ -1921,9 +2584,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_INFO,
"fixing the layout of %s", loc->path);
- dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
tmp = dict_get (xattr, "distribute.directory-spread-count");
@@ -1935,10 +2602,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(dir_spread > 0))) {
layout->spread_cnt = dir_spread;
- dht_fix_directory_layout (frame,
- dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame,
+ dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
gf_log (this->name, GF_LOG_ERROR,
"wrong 'directory-spread-count' value (%s)", value);
@@ -1950,14 +2621,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_err_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags);
+ loc, xattr, flags, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setxattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1965,7 +2636,7 @@ err:
int
dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -1991,7 +2662,8 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (removexattr, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
+ local->op_errno, NULL);
}
return 0;
@@ -2000,21 +2672,27 @@ unlock:
int
dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+ loc_t *loc, const char *key, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
int call_cnt = 0;
+ dht_conf_t *conf = NULL;
int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
if (!local) {
@@ -2045,14 +2723,78 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_removexattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->removexattr,
- loc, key);
+ loc, key, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (removexattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int
+dht_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+
+ int i;
+
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for inode=%s",
+ uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for inode=%s", uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fremovexattr,
+ fd, key, NULL);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -2060,7 +2802,7 @@ err:
int
dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -2087,7 +2829,7 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ local->fd, NULL);
return 0;
}
@@ -2117,7 +2859,7 @@ dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
int
dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -2163,14 +2905,14 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs);
+ &local->statvfs, xdata);
return 0;
}
int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
@@ -2182,7 +2924,6 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
@@ -2199,7 +2940,8 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (frame, dht_statfs_cbk,
conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc);
+ conf->subvolumes[i]->fops->statfs, loc,
+ xdata);
}
return 0;
}
@@ -2215,20 +2957,21 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_cnt = 1;
STACK_WIND (frame, dht_statfs_cbk,
- subvol, subvol->fops->statfs, loc);
+ subvol, subvol->fops->statfs, loc, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -2255,14 +2998,14 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
STACK_WIND (frame, dht_fd_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2270,7 +3013,7 @@ err:
int
dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries)
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
dht_local_t *local = NULL;
gf_dirent_t entries;
@@ -2283,6 +3026,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dht_layout_t *layout = 0;
dht_conf_t *conf = NULL;
xlator_t *subvol = 0;
+ int ret = 0;
INIT_LIST_HEAD (&entries.list);
prev = cookie;
@@ -2299,10 +3043,13 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
-
- if (check_is_linkfile_wo_dict (NULL, (&orig_entry->d_stat))
- || (check_is_dir (NULL, (&orig_entry->d_stat), NULL)
- && (prev->this != dht_first_up_subvol (this)))) {
+ if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) &&
+ (prev->this != local->first_up_subvol)) {
+ continue;
+ }
+ if (check_is_linkfile (NULL, (&orig_entry->d_stat),
+ orig_entry->dict,
+ conf->link_xattr_name)) {
continue;
}
@@ -2331,6 +3078,24 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
+ if (orig_entry->dict)
+ entry->dict = dict_ref (orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (!IA_ISDIR(entry->d_stat.ia_type)) {
+ ret = dht_layout_preset (this, prev->this,
+ orig_entry->inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to link the layout in inode");
+ entry->inode = inode_ref (orig_entry->inode);
+ } else if (orig_entry->inode) {
+ dht_inode_ctx_time_update (orig_entry->inode, this,
+ &entry->d_stat, 1);
+ }
+
list_add_tail (&entry->list, &entries.list);
count++;
}
@@ -2360,9 +3125,23 @@ done:
goto unwind;
}
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+
STACK_WIND (frame, dht_readdirp_cbk,
next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset);
+ local->fd, local->size, next_offset,
+ local->xattr);
return 0;
}
@@ -2370,7 +3149,7 @@ unwind:
if (op_ret < 0)
op_ret = 0;
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free (&entries);
@@ -2381,7 +3160,8 @@ unwind:
int
dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
gf_dirent_t entries;
@@ -2458,7 +3238,7 @@ done:
STACK_WIND (frame, dht_readdir_cbk,
next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset);
+ local->fd, local->size, next_offset, NULL);
return 0;
}
@@ -2466,7 +3246,7 @@ unwind:
if (op_ret < 0)
op_ret = 0;
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free (&entries);
@@ -2476,17 +3256,21 @@ unwind:
int
dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop)
+ off_t yoff, int whichop, dict_t *dict)
{
dht_local_t *local = NULL;
int op_errno = -1;
xlator_t *xvol = NULL;
off_t xoff = 0;
-
+ int ret = 0;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
local = dht_local_init (frame, NULL, NULL, whichop);
if (!local) {
@@ -2496,22 +3280,52 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->fd = fd_ref (fd);
local->size = size;
+ local->xattr_req = (dict)? dict_ref (dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol (this);
dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
/* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIR)
- STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff);
- else
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref (dict);
+ else
+ local->xattr = dict_new ();
+
+ if (local->xattr) {
+ ret = dict_set_uint32 (local->xattr,
+ conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set '%s' key",
+ conf->link_xattr_name);
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "Dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+ }
+
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff);
+ fd, size, xoff, local->xattr);
+ } else {
+ STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, xoff, local->xattr);
+ }
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2519,7 +3333,7 @@ err:
int
dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *xdata)
{
int op = GF_FOP_READDIR;
dht_conf_t *conf = NULL;
@@ -2540,15 +3354,15 @@ dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
op = GF_FOP_READDIRP;
out:
- dht_do_readdir (frame, this, fd, size, yoff, op);
+ dht_do_readdir (frame, this, fd, size, yoff, op, 0);
return 0;
}
int
dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *dict)
{
- dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP);
+ dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
return 0;
}
@@ -2556,7 +3370,7 @@ dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int
dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -2576,14 +3390,16 @@ dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
return 0;
}
int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int datasync, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -2610,14 +3426,14 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
STACK_WIND (frame, dht_fsyncdir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
@@ -2627,9 +3443,9 @@ int
dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
+ xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
@@ -2647,19 +3463,24 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
- ret = dht_layout_preset (this, prev->this, inode);
+ ret = dht_layout_preset (this, prev, inode);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"could not set pre-set layout for subvolume %s",
- prev->this->name);
+ prev? prev->name: NULL);
op_ret = -1;
op_errno = EINVAL;
goto out;
}
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
out:
/*
* FIXME: ia_size and st_blocks of preparent and postparent do not have
@@ -2668,10 +3489,9 @@ out:
* corresponding values from each of the subvolume.
* See dht_iatt_merge for reference.
*/
-
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
return 0;
}
@@ -2680,7 +3500,8 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -2689,22 +3510,28 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
goto err;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
cached_subvol = local->cached_subvol;
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask,
+ local->params);
return 0;
err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
int
dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -2736,11 +3563,13 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
+ subvol, subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
} else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+
+ avail_subvol = dht_free_disk_available_subvol (this, subvol,
+ local);
if (avail_subvol != subvol) {
/* Choose the minimum filled volume, and create the
files there */
@@ -2749,17 +3578,18 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
local->cached_subvol = avail_subvol;
local->mode = mode;
local->rdev = rdev;
-
+ local->umask = umask;
dht_linkfile_create (frame,
dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
} else {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
}
}
@@ -2768,7 +3598,7 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -2776,7 +3606,7 @@ err:
int
dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, dict_t *params)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -2804,23 +3634,24 @@ dht_symlink (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
xlator_t *cached_subvol = NULL;
xlator_t *hashed_subvol = NULL;
@@ -2838,7 +3669,7 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->loc.path, cached_subvol->name, loc->path);
STACK_WIND (frame, dht_unlink_cbk,
cached_subvol, cached_subvol->fops->unlink,
- &local->loc);
+ &local->loc, xflag, xdata);
goto done;
}
@@ -2866,18 +3697,21 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
+ local->flags = xflag;
if (hashed_subvol != cached_subvol) {
STACK_WIND (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc);
+ hashed_subvol, hashed_subvol->fops->unlink, loc,
+ xflag, xdata);
} else {
STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc);
+ cached_subvol, cached_subvol->fops->unlink, loc,
+ xflag, xdata);
}
done:
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -2887,13 +3721,16 @@ int
dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
prev = cookie;
+ local = frame->local;
+
if (op_ret == -1)
goto out;
@@ -2907,13 +3744,20 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- WIPE (preparent);
- WIPE (postparent);
-
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -2923,7 +3767,8 @@ int
dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *srcvol = NULL;
@@ -2935,14 +3780,14 @@ dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
srcvol = local->linkfile.srcvol;
STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, xdata);
return 0;
err:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -2950,7 +3795,7 @@ err:
int
dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_t *cached_subvol = NULL;
xlator_t *hashed_subvol = NULL;
@@ -2995,19 +3840,19 @@ dht_link (call_frame_t *frame, xlator_t *this,
if (hashed_subvol != cached_subvol) {
uuid_copy (local->gfid, oldloc->inode->gfid);
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
+ dht_linkfile_create (frame, dht_link_linkfile_cbk, this,
cached_subvol, hashed_subvol, newloc);
} else {
STACK_WIND (frame, dht_link_cbk,
cached_subvol, cached_subvol->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -3017,7 +3862,7 @@ int
dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
int ret = -1;
@@ -3036,8 +3881,11 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
ret = dht_layout_preset (this, prev->this, inode);
@@ -3049,11 +3897,14 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
goto out;
}
-
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -3063,7 +3914,8 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -3077,18 +3929,19 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
cached_subvol, cached_subvol->fops->create,
&local->loc, local->flags, local->mode,
- local->fd, local->params);
+ local->umask, local->fd, local->params);
return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
int
dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
int op_errno = -1;
xlator_t *subvol = NULL;
@@ -3114,7 +3967,7 @@ dht_create (call_frame_t *frame, xlator_t *this,
local->loc.path, subvol->name, loc->path);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- &local->loc, flags, mode, fd, params);
+ &local->loc, flags, mode, umask, fd, params);
goto done;
}
@@ -3132,38 +3985,38 @@ dht_create (call_frame_t *frame, xlator_t *this,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
goto done;
}
/* Choose the minimum filled volume, and create the
files there */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
if (avail_subvol != subvol) {
local->params = dict_ref (params);
local->flags = flags;
local->mode = mode;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s (link at %s)", loc->path,
avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
goto done;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
done:
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
@@ -3172,7 +4025,7 @@ err:
int
dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -3183,14 +4036,17 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
if (op_ret == 0) {
dht_layout_set (this, local->inode, layout);
if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
}
}
DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
local->inode, &local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
return 0;
}
@@ -3198,7 +4054,7 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
int
dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -3219,6 +4075,15 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ret = dht_layout_merge (this, layout, prev->this,
-1, ENOSPC, NULL);
} else {
+ if (op_ret == -1 && op_errno == EEXIST)
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
}
@@ -3251,7 +4116,8 @@ int
dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int ret = -1;
@@ -3308,19 +4174,20 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
continue;
STACK_WIND (frame, dht_mkdir_cbk,
conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir,
- &local->loc, local->mode, local->params);
+ conf->subvolumes[i]->fops->mkdir, &local->loc,
+ local->mode, local->umask, local->params);
}
return 0;
err:
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
-int
+ int
dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -3356,6 +4223,7 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
local->hashed_subvol = hashed_subvol;
local->mode = mode;
+ local->umask = umask;
local->params = dict_ref (params);
local->inode = inode_ref (loc->inode);
@@ -3368,13 +4236,14 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_mkdir_hashed_cbk,
hashed_subvol,
hashed_subvol->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -3382,14 +4251,87 @@ err:
int
dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (op_errno != ENOENT && op_errno != EACCES) {
+ local->need_selfheal = 1;
+ }
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ uuid_copy (local->gfid, local->loc.inode->gfid);
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, local->layout);
+ } else {
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+ }
+
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
return 0;
}
@@ -3398,11 +4340,12 @@ dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
+ int done = 0;
local = frame->local;
prev = cookie;
@@ -3435,7 +4378,16 @@ unlock:
this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
+
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
+
+
+ if (done) {
if (local->need_selfheal && local->fop_succeeded) {
local->layout =
dht_layout_get (this, local->loc.inode);
@@ -3446,15 +4398,34 @@ unlock:
uuid_copy (local->gfid, local->loc.inode->gfid);
dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
&local->loc, local->layout);
- } else {
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+
+ local->need_selfheal = 0;
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
+
if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+
}
DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
local->op_errno, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
}
@@ -3468,6 +4439,7 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int i = 0;
+ xlator_t *hashed_subvol = NULL;
VALIDATE_OR_GOTO (this->private, err);
@@ -3479,18 +4451,41 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
local->call_cnt = conf->subvolume_cnt;
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to get hashed "
+ "subvol for %s",local->loc.path);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ return 0;
+ }
+
for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol &&
+ (hashed_subvol == conf->subvolumes[i]))
+ continue;
+
STACK_WIND (frame, dht_rmdir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->rmdir,
- &local->loc, local->flags);
+ &local->loc, local->flags, NULL);
}
return 0;
err:
DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -3498,7 +4493,7 @@ err:
int
dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -3546,6 +4541,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_frame_t *main_frame = NULL;
dht_local_t *main_local = NULL;
int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
prev = cookie;
@@ -3557,7 +4553,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != 0)
goto err;
- if (check_is_linkfile (inode, stbuf, xattr) == 0) {
+ if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
main_local->op_ret = -1;
main_local->op_errno = ENOTEMPTY;
@@ -3568,7 +4564,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
- src, src->fops->unlink, &local->loc);
+ src, src->fops->unlink, &local->loc, 0, NULL);
return 0;
err:
@@ -3591,6 +4587,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
call_frame_t *lookup_frame = NULL;
dht_local_t *lookup_local = NULL;
dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
local = frame->local;
@@ -3599,7 +4597,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
continue;
if (strcmp (trav->d_name, "..") == 0)
continue;
- if (check_is_linkfile (NULL, (&trav->d_stat), NULL) == 1) {
+ if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
ret++;
continue;
}
@@ -3611,6 +4610,21 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
return 0;
}
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ return -1;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
+ " in dict");
+ if (xattrs)
+ dict_unref (xattrs);
+ return -1;
+ }
+
list_for_each_entry (trav, &entries->list, list) {
if (strcmp (trav->d_name, ".") == 0)
continue;
@@ -3627,8 +4641,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
goto err;
}
- lookup_local = GF_CALLOC (sizeof (*local), 1,
- gf_dht_mt_dht_local_t);
+ lookup_local = mem_get0 (this->local_pool);
if (!lookup_local) {
goto err;
}
@@ -3655,12 +4668,18 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
src, src->fops->lookup,
- &lookup_local->loc, NULL);
+ &lookup_local->loc, xattrs);
ret++;
}
+ if (xattrs)
+ dict_unref (xattrs);
+
return ret;
err:
+ if (xattrs)
+ dict_unref (xattrs);
+
DHT_STACK_DESTROY (lookup_frame);
return 0;
}
@@ -3668,7 +4687,8 @@ err:
int
dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -3712,12 +4732,14 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
call_frame_t *prev = NULL;
-
+ dict_t *dict = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
prev = cookie;
@@ -3727,14 +4749,32 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"opendir on %s for %s failed (%s)",
prev->this->name, local->loc.path,
strerror (op_errno));
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
local->op_ret = -1;
- local->op_errno = op_errno;
+ local->op_errno = ENOMEM;
goto err;
}
+ ret = dict_set_uint32 (dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ local->loc.path, conf->link_xattr_name);
+
STACK_WIND (frame, dht_rmdir_readdirp_cbk,
prev->this, prev->this->fops->readdirp,
- local->fd, 4096, 0);
+ local->fd, 4096, 0, dict);
+
+ if (dict)
+ dict_unref (dict);
return 0;
@@ -3750,7 +4790,8 @@ err:
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -3789,7 +4830,7 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
STACK_WIND (frame, dht_rmdir_opendir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- loc, local->fd);
+ loc, local->fd, NULL);
}
return 0;
@@ -3797,17 +4838,17 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int
dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -3815,7 +4856,7 @@ dht_entrylk_cbk (call_frame_t *frame, void *cookie,
int
dht_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -3825,7 +4866,6 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
if (!local) {
@@ -3845,13 +4885,13 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_entrylk_cbk,
subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (entrylk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -3859,10 +4899,10 @@ err:
int
dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -3870,7 +4910,7 @@ dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
int
dht_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -3889,13 +4929,13 @@ dht_fentrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_fentrylk_cbk,
subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -3904,16 +4944,21 @@ err:
int
dht_forget (xlator_t *this, inode_t *inode)
{
- uint64_t tmp_layout = 0;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
dht_layout_t *layout = NULL;
- inode_ctx_del (inode, this, &tmp_layout);
+ inode_ctx_del (inode, this, &ctx_int);
- if (!tmp_layout)
+ if (!ctx_int)
return 0;
- layout = (dht_layout_t *)(long)tmp_layout;
+ ctx = (dht_inode_ctx_t *) (long) ctx_int;
+
+ layout = ctx->layout;
+ ctx->layout = NULL;
dht_layout_unref (this, layout);
+ GF_FREE (ctx);
return 0;
}
@@ -3922,16 +4967,22 @@ dht_forget (xlator_t *this, inode_t *inode)
int
dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
conf = this->private;
if (!conf)
@@ -3993,7 +5044,11 @@ dht_notify (xlator_t *this, int event, void *data, ...)
if (conf->assert_no_child_down) {
gf_log (this->name, GF_LOG_WARNING,
"Received CHILD_DOWN. Exiting");
- exit(0);
+ if (conf->defrag) {
+ gf_defrag_stop (conf->defrag, NULL);
+ } else {
+ kill (getpid(), SIGTERM);
+ }
}
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -4044,6 +5099,36 @@ dht_notify (xlator_t *this, int event, void *data, ...)
UNLOCK (&conf->subvolume_lock);
break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if (cmd == GF_DEFRAG_CMD_STATUS)
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ gf_defrag_stop (defrag, output);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return 0;
+ break;
+ }
+
default:
propagate = 1;
break;
@@ -4059,9 +5144,12 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* if all subvols have reported status, no need to hide anything
or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
+ if (have_heard_from_all) {
propagate = 1;
+ }
+
+
if (!had_heard_from_all && have_heard_from_all) {
/* This is the first event which completes aggregation
of events from all subvolumes. If at least one subvol
@@ -4080,6 +5168,19 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* continue to check other events for CHILD_UP */
}
}
+
+ /* rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ */
+ if (conf->defrag) {
+ ret = gf_thread_create (&conf->defrag->th, NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ conf->defrag = NULL;
+ GF_FREE (conf->defrag);
+ kill (getpid(), SIGTERM);
+ }
+ }
}
ret = 0;
@@ -4088,3 +5189,24 @@ dht_notify (xlator_t *this, int event, void *data, ...)
return ret;
}
+
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 791cf6335..5ccd66799 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -22,6 +13,8 @@
#include "config.h"
#endif
+#include <regex.h>
+
#include "dht-mem-types.h"
#include "libxlator.h"
#include "syncop.h"
@@ -38,7 +31,8 @@
typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
int ret);
@@ -61,20 +55,38 @@ struct dht_layout {
uint32_t start;
uint32_t stop;
xlator_t *xlator;
- } list[0];
+ } list[];
};
typedef struct dht_layout dht_layout_t;
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+};
+
+typedef struct dht_inode_ctx dht_inode_ctx_t;
+
typedef enum {
DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
} dht_hashfn_type_t;
/* rebalance related */
struct dht_rebalance_ {
xlator_t *from_subvol;
xlator_t *target_node;
- int32_t wbflags;
off_t offset;
size_t size;
int32_t flags;
@@ -83,6 +95,7 @@ struct dht_rebalance_ {
struct iovec *vector;
struct iatt stbuf;
dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
};
struct dht_local {
@@ -129,7 +142,6 @@ struct dht_local {
struct {
uint32_t hole_cnt;
uint32_t overlaps_cnt;
- uint32_t missing;
uint32_t down;
uint32_t misc;
dht_selfheal_dir_cbk_t dir_cbk;
@@ -142,11 +154,16 @@ struct dht_local {
int32_t flags;
mode_t mode;
dev_t rdev;
+ mode_t umask;
/* need for file-info */
- char *pathinfo;
+ char *xattr_val;
char *key;
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
char *newpath;
/* gfid related */
@@ -162,7 +179,12 @@ struct dht_local {
glusterfs_fop_t fop;
+ gf_boolean_t linked;
+ xlator_t *link_subvol;
+
struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
};
typedef struct dht_local dht_local_t;
@@ -175,6 +197,59 @@ struct dht_du {
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
+
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ struct timeval start_time;
+ gf_boolean_t stats;
+ gf_defrag_pattern_list_t *defrag_pattern;
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
struct dht_conf {
gf_lock_t subvolume_lock;
int subvolume_cnt;
@@ -186,8 +261,8 @@ struct dht_conf {
gf_boolean_t search_unhashed;
int gen;
dht_du_t *du_stats;
- uint64_t min_free_disk;
- uint32_t min_free_inodes;
+ double min_free_disk;
+ double min_free_inodes;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
@@ -204,10 +279,28 @@ struct dht_conf {
/* Will be a global flag to control the layout spread count */
uint32_t dir_spread_cnt;
- struct syncenv *env; /* The env pointer to the rebalance synctask */
-
/* to keep track of nodes which are decomissioned */
xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Request to filter directory entries in readdir request */
+
+ gf_boolean_t readdir_optimize;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ gf_boolean_t rsync_regex_valid;
+ regex_t extra_regex;
+ gf_boolean_t extra_regex_valid;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *link_xattr_name;
+ char *wild_xattr_name;
};
typedef struct dht_conf dht_conf_t;
@@ -222,29 +315,28 @@ struct dht_disk_layout {
};
typedef struct dht_disk_layout dht_disk_layout_t;
+typedef enum {
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+} gf_dht_migrate_data_type_t;
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
-
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
#define DHT_MIGRATION_IN_PROGRESS 1
#define DHT_MIGRATION_COMPLETED 2
-#define DHT_LINKFILE_KEY "trusted.glusterfs.dht.linkto"
#define DHT_LINKFILE_MODE (S_ISVTX)
-#define check_is_linkfile(i,s,x) ( \
+#define check_is_linkfile(i,s,x,n) ( \
((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE) && \
- dict_get (x, DHT_LINKFILE_KEY))
-
-#define check_is_linkfile_wo_dict(i,s) ( \
- ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE))
+ == DHT_LINKFILE_MODE) && \
+ dict_get (x, n))
#define IS_DHT_MIGRATION_PHASE2(buf) ( \
IA_ISREG ((buf)->ia_type) && \
@@ -289,6 +381,25 @@ typedef struct dht_disk_layout dht_disk_layout_t;
dht_local_wipe (__xl, __local); \
} while (0)
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\
+ int32_t sec = 0; \
+ sec = new_sec; \
+ LOCK (&inode->lock); \
+ { \
+ new_sec = max(new_sec, ctx_sec); \
+ if (sec < new_sec) \
+ new_nsec = ctx_nsec; \
+ if (sec == new_sec) \
+ new_nsec = max (new_nsec, ctx_nsec); \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
@@ -298,7 +409,7 @@ int dht_layout_normalize (xlator_t *this, l
int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t *holes_p, uint32_t *overlaps_p,
uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
+ uint32_t *misc_p, uint32_t *no_space_p);
int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
xlator_t *subvol, loc_t *loc, dict_t *xattr);
@@ -332,12 +443,14 @@ int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt
xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
+xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev);
int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
+ xlator_t *this, xlator_t *tovol,
+ xlator_t *fromvol, loc_t *loc);
int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
@@ -352,17 +465,15 @@ dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
int
dht_layout_sort_volname (dht_layout_t *layout);
-int dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
-
int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);
+int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
xlator_t *dht_first_up_subvol (xlator_t *this);
@@ -377,7 +488,8 @@ int dht_rename_cleanup (call_frame_t *frame)
int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent);
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
int dht_fix_directory_layout (call_frame_t *frame,
dht_selfheal_dir_cbk_t dir_cbk,
@@ -400,73 +512,73 @@ int32_t dht_lookup (call_frame_t *frame,
int32_t dht_stat (call_frame_t *frame,
xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t dht_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t dht_truncate (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- off_t offset);
+ off_t offset, dict_t *xdata);
int32_t dht_ftruncate (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- off_t offset);
+ off_t offset, dict_t *xdata);
int32_t dht_access (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t mask);
+ int32_t mask, dict_t *xdata);
int32_t dht_readlink (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- size_t size);
+ size_t size, dict_t *xdata);
-int32_t dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params);
+int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t dht_unlink (call_frame_t *frame,
xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params);
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
int32_t dht_rename (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc);
+ loc_t *newloc, dict_t *xdata);
int32_t dht_link (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc);
+ loc_t *newloc, dict_t *xdata);
int32_t dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+ mode_t umask, fd_t *fd, dict_t *params);
int32_t dht_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd,
- int32_t wbflags);
+ int32_t flags, fd_t *fd, dict_t *xdata);
int32_t dht_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset);
+ off_t offset, uint32_t flags, dict_t *xdata);
int32_t dht_writev (call_frame_t *frame,
xlator_t *this,
@@ -474,107 +586,121 @@ int32_t dht_writev (call_frame_t *frame,
struct iovec *vector,
int32_t count,
off_t offset,
- struct iobref *iobref);
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
int32_t dht_flush (call_frame_t *frame,
xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t dht_fsync (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t datasync);
+ int32_t datasync, dict_t *xdata);
int32_t dht_opendir (call_frame_t *frame,
xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t dht_fsyncdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t datasync);
+ int32_t datasync, dict_t *xdata);
int32_t dht_statfs (call_frame_t *frame,
xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t dht_setxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t dht_getxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
int32_t dht_fsetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t dht_fgetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- const char *name);
+ const char *name, dict_t *xdata);
int32_t dht_removexattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
+int32_t dht_fremovexattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
int32_t dht_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int32_t dht_readdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- size_t size, off_t off);
+ size_t size, off_t off, dict_t *xdata);
int32_t dht_readdirp (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- size_t size, off_t off);
+ size_t size, off_t off, dict_t *dict);
int32_t dht_xattrop (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
gf_xattrop_flags_t flags,
- dict_t *dict);
+ dict_t *dict, dict_t *xdata);
int32_t dht_fxattrop (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
gf_xattrop_flags_t flags,
- dict_t *dict);
+ dict_t *dict, dict_t *xdata);
int32_t dht_forget (xlator_t *this, inode_t *inode);
int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid);
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid);
-
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+int32_t dht_init (xlator_t *this);
+void dht_fini (xlator_t *this);
+int dht_reconfigure (xlator_t *this, dict_t *options);
int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
/* definitions for nufa/switch */
@@ -597,12 +723,65 @@ int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent);
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent);
+ struct iatt *postparent, dict_t *xdata);
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+void*
+gf_defrag_start (void *this);
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf);
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag);
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t* layout_int);
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
+
+int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal (void *data);
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
+int
+dht_dir_has_layout (dict_t *xattr, char *name);
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+xlator_t *
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix);
+int32_t
+dht_priv_dump (xlator_t *this);
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 5453e3b10..fe3955ecb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -35,7 +26,8 @@
int
dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs,
+ dict_t *xdata)
{
dht_conf_t *conf = NULL;
call_frame_t *prev = NULL;
@@ -105,6 +97,7 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
call_pool_t *pool = NULL;
+ loc_t tmp_loc = {0,};
conf = this->private;
pool = this->ctx->pool;
@@ -121,15 +114,14 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
goto err;
}
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
statfs_local->call_cnt = 1;
STACK_WIND (statfs_frame, dht_du_info_cbk,
conf->subvolumes[subvol_idx],
conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc);
+ &tmp_loc, NULL);
return 0;
err:
@@ -142,15 +134,21 @@ err:
int
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
+ int i = 0;
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
+ struct timeval tv = {0,};
+ loc_t tmp_loc = {0,};
conf = this->private;
gettimeofday (&tv, NULL);
+
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
+
if (tv.tv_sec > (conf->refresh_interval
+ conf->last_stat_fetch.tv_sec)) {
@@ -166,16 +164,12 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
statfs_local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (statfs_frame, dht_du_info_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->statfs,
- &tmp_loc);
+ &tmp_loc, NULL);
}
conf->last_stat_fetch.tv_sec = tv.tv_sec;
@@ -254,50 +248,167 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
return is_subvol_filled;
}
+
+/*Get the best subvolume to create the file in*/
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
{
- int i = 0;
- double max = 0;
- double max_inodes = 0;
xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
conf = this->private;
-
- LOCK (&conf->subvolume_lock);
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref (this, local->layout);
+ }
+
+ LOCK (&conf->subvolume_lock);
{
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if ((conf->du_stats[i].avail_percent > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_percent;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if ((conf->du_stats[i].avail_space > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_space;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ layout);
+ if(!avail_subvol)
+ {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
+ subvol,
+ layout);
+ }
- }
- }
}
UNLOCK (&conf->subvolume_lock);
-
+out:
if (!avail_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space and inodes to create");
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "no subvolume has enough free space and/or inodes\
+ to create");
+ avail_subvol = subvol;
}
- if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes))
- avail_subvol = subvol;
+ if (layout)
+ dht_layout_unref (this, layout);
+ return avail_subvol;
+}
- if (!avail_subvol)
- avail_subvol = subvol;
+static inline
+int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
- return avail_subvol;
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for(i=0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
+}
+
+
+/* Get subvol which has atleast one inode and maximum space */
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max)
+ && (conf->du_stats[i].avail_inodes > 0 )) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ } else {
+ if ((conf->du_stats[i].avail_space > max)
+ && (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index c8ae74172..656cf23a0 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -37,6 +28,7 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
switch (type) {
case DHT_HASH_TYPE_DM:
+ case DHT_HASH_TYPE_DM_USER:
hash = gf_dm_hashfn (name, strlen (name));
break;
default:
@@ -52,30 +44,68 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
}
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
+static inline
+gf_boolean_t
+dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2];
+ size_t new_len;
+
+ if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy (modified,original+matches[1].rm_so,
+ new_len);
+ modified[new_len] = '\0';
+ return _gf_true;
+ }
+ }
+ }
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified,original);
+ return _gf_false;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = this->private;
+ size_t len = 0;
+ gf_boolean_t munged = _gf_false;
+
+ /*
+ * It wouldn't be safe to use alloca in an inline function that doesn't
+ * actually get inlined, and it wouldn't be efficient to do a real
+ * allocation, so we use alloca here (if needed) and pass that to the
+ * inline.
+ */
+
+ if (priv->extra_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ gf_log (this->name, GF_LOG_TRACE, "trying regex for %s", name);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ if (munged) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "munged down to %s", rsync_friendly_name);
+ }
+ }
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+ if (!munged) {
+ rsync_friendly_name = (char *)name;
+ }
return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 01d11ee68..311a48112 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,6 +18,28 @@
#include "xlator.h"
#include "dht-common.h"
+static inline int
+dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol)
+{
+ uint64_t tmp_subvol = 0;
+
+ tmp_subvol = (long)subvol;
+ return inode_ctx_set1 (inode, this, &tmp_subvol);
+}
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol)
+{
+ int ret = -1;
+ uint64_t tmp_subvol = 0;
+
+ ret = inode_ctx_get1 (inode, this, &tmp_subvol);
+ if (tmp_subvol && subvol)
+ *subvol = (xlator_t *)tmp_subvol;
+
+ return ret;
+}
+
int
dht_frame_return (call_frame_t *frame)
@@ -49,6 +62,43 @@ dht_frame_return (call_frame_t *frame)
}
+static uint64_t
+dht_bits_for (uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
+ }
+
+ return bits;
+}
+
+/*
+ * A slightly "updated" version of the algorithm described in the commit log
+ * is used here.
+ *
+ * The only enhancement is that:
+ *
+ * - The number of bits used by the backend filesystem for HUGE d_off which
+ * is described as 63, and
+ * - The number of bits used by the d_off presented by the transformation
+ * upwards which is described as 64, are both made "configurable."
+ */
+
+
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
+
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
+
int
dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
{
@@ -56,6 +106,9 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
int cnt = 0;
int max = 0;
uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
if (x == ((uint64_t) -1)) {
y = (uint64_t) -1;
@@ -69,7 +122,23 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
max = conf->subvolume_cnt;
cnt = dht_subvol_cnt (this, subvol);
- y = ((x * max) + cnt);
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
+
+ max_bits = dht_bits_for (max);
+
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
+
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
+ }
out:
if (y_p)
@@ -89,7 +158,7 @@ dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
int ret = 0; /* not found */
/* Why do other tasks if first required 'char' itself is not there */
- if (loc->name && !strchr (loc->name, '@'))
+ if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
goto out;
trav = this->children;
@@ -129,10 +198,8 @@ dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
out:
if (!ret) {
/* !success */
- if (new_path)
- GF_FREE (new_path);
- if (new_name)
- GF_FREE (new_name);
+ GF_FREE (new_path);
+ GF_FREE (new_name);
}
return ret;
}
@@ -146,16 +213,38 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
int max = 0;
uint64_t x = 0;
xlator_t *subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
if (!this->private)
- goto out;
+ return -1;
conf = this->private;
max = conf->subvolume_cnt;
- cnt = y % max;
- x = y / max;
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
+
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = dht_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
+
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
+
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
+ }
+out:
subvol = conf->subvolumes[cnt];
if (subvol_p)
@@ -164,7 +253,6 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
if (x_p)
*x_p = x;
-out:
return 0;
}
@@ -215,21 +303,16 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
local->selfheal.layout = NULL;
}
- if (local->newpath) {
- GF_FREE (local->newpath);
- }
+ GF_FREE (local->newpath);
- if (local->key) {
- GF_FREE (local->key);
- }
+ GF_FREE (local->key);
- if (local->rebalance.vector)
- GF_FREE (local->rebalance.vector);
+ GF_FREE (local->rebalance.vector);
if (local->rebalance.iobref)
iobref_unref (local->rebalance.iobref);
- GF_FREE (local);
+ mem_put (local);
}
@@ -240,8 +323,7 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
inode_t *inode = NULL;
int ret = 0;
- /* TODO: use mem-pool */
- local = GF_CALLOC (1, sizeof (*local), gf_dht_mt_dht_local_t);
+ local = mem_get0 (THIS->local_pool);
if (!local)
goto out;
@@ -274,26 +356,12 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
out:
if (ret) {
if (local)
- GF_FREE (local);
+ mem_put (local);
local = NULL;
}
return local;
}
-
-char *
-basestr (const char *str)
-{
- char *basestr = NULL;
-
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
-
- return basestr;
-}
-
-
xlator_t *
dht_first_up_subvol (xlator_t *this)
{
@@ -358,11 +426,17 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
- if (is_fs_root (loc)) {
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ if (__is_root_gfid (loc->gfid)) {
subvol = dht_first_up_subvol (this);
goto out;
}
+ GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+
layout = dht_layout_get (this, loc->parent);
if (!layout) {
@@ -396,6 +470,8 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
layout = dht_layout_get (this, inode);
@@ -437,7 +513,36 @@ out:
return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+
+out:
+ return next;
+}
int
dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
{
@@ -466,6 +571,15 @@ out:
(a) = (b); \
} while (0)
+
+#define set_if_greater_time(a, an, b, bn) do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0) \
+
+
int
dht_iatt_merge (xlator_t *this, struct iatt *to,
struct iatt *from, xlator_t *subvol)
@@ -489,9 +603,12 @@ dht_iatt_merge (xlator_t *this, struct iatt *to,
set_if_greater (to->ia_uid, from->ia_uid);
set_if_greater (to->ia_gid, from->ia_gid);
- set_if_greater (to->ia_atime, from->ia_atime);
- set_if_greater (to->ia_mtime, from->ia_mtime);
- set_if_greater (to->ia_ctime, from->ia_ctime);
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
+ from->ia_atime, from->ia_atime_nsec);
+ set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
+ from->ia_mtime, from->ia_mtime_nsec);
+ set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
+ from->ia_ctime, from->ia_ctime_nsec);
return 0;
}
@@ -617,20 +734,36 @@ dht_migration_complete_check_task (void *data)
call_frame_t *frame = NULL;
loc_t tmp_loc = {0,};
char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ uint64_t tmp_subvol = 0;
+ int open_failed = 0;
this = THIS;
frame = data;
local = frame->local;
+ conf = this->private;
src_node = local->cached_subvol;
- /* getxattr on cached_subvol for 'linkto' value */
- if (!local->loc.inode)
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+
+ if (!local->loc.inode) {
ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
- else
+ conf->link_xattr_name);
+ } else {
+ SYNCTASK_SETID (0, 0);
ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ }
if (!ret)
dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
@@ -681,10 +814,7 @@ dht_migration_complete_check_task (void *data)
/* update inode ctx (the layout) */
dht_layout_unref (this, local->layout);
- if (!local->loc.inode)
- ret = dht_layout_preset (this, dst_node, local->fd->inode);
- else
- ret = dht_layout_preset (this, dst_node, local->loc.inode);
+ ret = dht_layout_preset (this, dst_node, inode);
if (ret != 0) {
gf_log (this->name, GF_LOG_DEBUG,
"%s: could not set preset layout for subvol %s",
@@ -702,10 +832,7 @@ dht_migration_complete_check_task (void *data)
goto out;
}
- if (!local->loc.inode)
- ret = dht_layout_set (this, local->fd->inode, layout);
- else
- ret = dht_layout_set (this, local->loc.inode, layout);
+ ret = dht_layout_set (this, inode, layout);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to set the new layout",
@@ -716,43 +843,46 @@ dht_migration_complete_check_task (void *data)
local->cached_subvol = dst_node;
ret = 0;
- if (!local->fd)
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1 (inode, this, &tmp_subvol);
+ if (tmp_subvol)
goto out;
- /* once we detect the migration complete, the fd-ctx is no more
- required.. delete the ctx, and do one extra 'fd_unref' for open fd */
- ret = fd_ctx_del (local->fd, this, NULL);
- if (!ret) {
- fd_unref (local->fd);
- ret = 0;
+ if (list_empty (&inode->fd_list))
goto out;
- }
- /* if 'local->fd' (ie, fd based operation), send a 'open()' on
- destination if not already done */
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
- } else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
- ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* perform 'open()' on all the fd's present on the inode */
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+ ret = syncop_open (dst_node, &tmp_loc,
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ if (open_failed) {
+ ret = -1;
goto out;
}
-
- /* need this unref for the fd on src_node */
- fd_unref (local->fd);
ret = 0;
out:
@@ -763,11 +893,8 @@ int
dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
{
int ret = -1;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
- ret = synctask_new (conf->env, dht_migration_complete_check_task,
+ ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
dht_migration_complete_check_done,
frame, frame);
return ret;
@@ -799,20 +926,34 @@ dht_rebalance_inprogress_task (void *data)
char *path = NULL;
struct iatt stbuf = {0,};
loc_t tmp_loc = {0,};
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ int open_failed = 0;
this = THIS;
frame = data;
local = frame->local;
+ conf = this->private;
src_node = local->cached_subvol;
- /* getxattr on cached_subvol for 'linkto' value */
- if (local->loc.inode)
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID (0, 0);
ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
- else
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ } else {
ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
+ conf->link_xattr_name);
+ }
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -854,34 +995,46 @@ dht_rebalance_inprogress_task (void *data)
ret = 0;
- if (!local->fd)
- goto out;
+ if (list_empty (&inode->fd_list))
+ goto done;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID (0, 0);
+
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
- } else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to send open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
}
+ GF_FREE (path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+ if (open_failed) {
+ ret = -1;
goto out;
}
- ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node);
+done:
+ ret = dht_inode_ctx_set1 (this, inode, dst_node);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set fd-ctx target file at %s",
+ "%s: failed to set inode-ctx target file at %s",
local->loc.path, dst_node->name);
goto out;
}
@@ -896,12 +1049,99 @@ dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
{
int ret = -1;
- dht_conf_t *conf = NULL;
- conf = this->private;
-
- ret = synctask_new (conf->env, dht_rebalance_inprogress_task,
+ ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
dht_inprogress_check_done,
frame, frame);
return ret;
}
+
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+
+ return ret;
+}
+
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, stat, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
+
+ time = &ctx->time;
+
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec,
+ stat->ia_mtime, stat->ia_mtime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec,
+ stat->ia_ctime, stat->ia_ctime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec,
+ stat->ia_atime, stat->ia_atime_nsec, inode, post);
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+out:
+ return 0;
+}
+
+int
+dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *) ctx_int;
+out:
+ return ret;
+}
+
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 7352e47bd..ece84151a 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -34,7 +25,7 @@ int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
int
dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -61,7 +52,7 @@ dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
out:
- DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd);
+ DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
return 0;
}
@@ -86,17 +77,17 @@ dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
&local->loc, local->rebalance.flags, local->fd,
- local->rebalance.wbflags);
+ NULL);
return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, int wbflags)
+ loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -120,30 +111,30 @@ dht_open (call_frame_t *frame, xlator_t *this,
goto err;
}
- local->rebalance.wbflags = wbflags;
local->rebalance.flags = flags;
local->call_cnt = 1;
STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf)
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
{
- uint64_t tmp_subvol = 0;
+ xlator_t *subvol = 0;
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
+ inode_t *inode = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -164,26 +155,28 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
+ local->op_errno = op_errno;
/* Check if the rebalance phase2 is true */
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (ret) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
/* Phase 2 of migration */
local->rebalance.target_op_fn = dht_attr2;
ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
} else {
/* value is already set in fd_ctx, that means no need
to check for whether its complete or not. */
dht_attr2 (this, frame, 0);
- }
- if (!ret)
return 0;
+ }
}
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf);
+ DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
err:
return 0;
}
@@ -208,21 +201,21 @@ dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
if (local->fop == GF_FOP_FSTAT) {
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, local->fd);
+ subvol->fops->fstat, local->fd, NULL);
} else {
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, &local->loc);
+ subvol->fops->stat, &local->loc, NULL);
}
return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf)
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -257,14 +250,14 @@ out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf);
+ &local->stbuf, xdata);
}
err:
return 0;
}
int
-dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -300,7 +293,7 @@ dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, loc);
+ subvol->fops->stat, loc, xdata);
return 0;
}
@@ -312,21 +305,21 @@ dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
STACK_WIND (frame, dht_attr_cbk,
subvol, subvol->fops->stat,
- loc);
+ loc, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -360,7 +353,7 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, fd);
+ subvol->fops->fstat, fd, xdata);
return 0;
}
@@ -371,14 +364,14 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
subvol = layout->list[i].xlator;
STACK_WIND (frame, dht_attr_cbk,
subvol, subvol->fops->fstat,
- fd);
+ fd, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -387,10 +380,12 @@ int
dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
dht_local_t *local = NULL;
int ret = 0;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
local = frame->local;
if (!local) {
@@ -406,25 +401,27 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno != ENOENT))
goto out;
+ local->op_errno = op_errno;
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
/* File would be migrated to other node */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
local->rebalance.target_op_fn = dht_readv2;
ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
} else {
/* value is already set in fd_ctx, that means no need
to check for whether its complete or not. */
dht_readv2 (this, frame, 0);
- }
- if (!ret)
return 0;
+ }
}
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
+ iobref, xdata);
return 0;
}
@@ -448,18 +445,19 @@ dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
- local->fd, local->rebalance.size, local->rebalance.offset);
+ local->fd, local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, NULL);
return 0;
out:
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
int
dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+ fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -485,35 +483,57 @@ dht_readv (call_frame_t *frame, xlator_t *this,
local->rebalance.offset = off;
local->rebalance.size = size;
+ local->rebalance.flags = flags;
local->call_cnt = 1;
STACK_WIND (frame, dht_readv_cbk,
subvol, subvol->fops->readv,
- fd, size, off);
+ fd, size, off, flags, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
int
dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
int ret = -1;
dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
local = frame->local;
+ prev = cookie;
+ if (!prev || !prev->this)
+ goto out;
if (local->call_cnt != 1)
goto out;
+ if ((op_ret == -1) && (op_errno == ENOTCONN) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+
+ subvol = dht_subvol_next_available (this, prev->this);
+ if (!subvol)
+ goto out;
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+ return 0;
+ }
if ((op_ret == -1) && (op_errno == ENOENT)) {
/* File would be migrated to other node */
+ local->op_errno = op_errno;
local->rebalance.target_op_fn = dht_access2;
ret = dht_rebalance_complete_check (frame->this, frame);
if (!ret)
@@ -521,7 +541,7 @@ dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
out:
- DHT_STACK_UNWIND (access, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -544,18 +564,19 @@ dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- &local->loc, local->rebalance.flags);
+ &local->loc, local->rebalance.flags, NULL);
return 0;
out:
- DHT_STACK_UNWIND (access, frame, -1, op_errno);
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -584,13 +605,13 @@ dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
}
STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno);
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
return 0;
}
@@ -598,10 +619,11 @@ err:
int
dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
local = frame->local;
@@ -611,14 +633,14 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
/* If context is set, then send flush() it to the destination */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
dht_flush2 (this, frame, 0);
return 0;
}
out:
- DHT_STACK_UNWIND (flush, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -628,14 +650,10 @@ dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
local = frame->local;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -643,20 +661,19 @@ dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
local->call_cnt = 2; /* This is the second attempt */
STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, local->fd);
+ subvol, subvol->fops->flush, local->fd, NULL);
return 0;
}
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -678,13 +695,13 @@ dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
local->call_cnt = 1;
STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, fd);
+ subvol, subvol->fops->flush, fd, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno);
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -692,17 +709,20 @@ err:
int
dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf)
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
local = frame->local;
prev = cookie;
local->op_errno = op_errno;
- if (op_ret == -1) {
+ if (op_ret == -1 && (op_errno != ENOENT)) {
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
prev->this->name, strerror (op_errno));
@@ -717,8 +737,9 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
+ local->op_errno = op_errno;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
local->rebalance.target_op_fn = dht_fsync2;
/* Check if the rebalance phase1 is true */
@@ -733,17 +754,18 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
}
+ if (!ret)
+ return 0;
} else {
dht_fsync2 (this, frame, 0);
- }
- if (!ret)
return 0;
+ }
out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
@@ -753,34 +775,29 @@ dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
local = frame->local;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
-
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
local->call_cnt = 2; /* This is the second attempt */
STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- local->fd, local->rebalance.flags);
+ local->fd, local->rebalance.flags, NULL);
return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -798,13 +815,13 @@ dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -815,9 +832,9 @@ err:
phase 2 of migration */
int
dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct gf_flock *flock)
+ int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock);
+ DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
return 0;
}
@@ -825,7 +842,7 @@ dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct gf_flock *flock)
+ fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -845,13 +862,13 @@ dht_lk (call_frame_t *frame, xlator_t *this,
/* TODO: for rebalance, we need to preserve the fop arguments */
STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
- cmd, flock);
+ cmd, flock, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -859,7 +876,8 @@ err:
/* Symlinks are currently not migrated, so no need for any check here */
int
dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path, struct iatt *stbuf)
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -874,14 +892,15 @@ dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
err:
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf);
+ DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
return 0;
}
int
-dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -909,13 +928,13 @@ dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
STACK_WIND (frame, dht_readlink_cbk,
subvol, subvol->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -926,16 +945,16 @@ err:
int
dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -966,13 +985,13 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame,
dht_xattrop_cbk,
subvol, subvol->fops->xattrop,
- loc, flags, dict);
+ loc, flags, dict, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -980,16 +999,16 @@ err:
int
dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict);
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -1009,13 +1028,13 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame,
dht_fxattrop_cbk,
subvol, subvol->fops->fxattrop,
- fd, flags, dict);
+ fd, flags, dict, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1023,17 +1042,17 @@ err:
int
dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
+dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -1065,31 +1084,31 @@ dht_inodelk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame,
dht_inodelk_cbk,
subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock);
+ volume, loc, cmd, lock, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
-dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
+dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -1107,16 +1126,14 @@ dht_finodelk (call_frame_t *frame, xlator_t *this,
}
- STACK_WIND (frame,
- dht_finodelk_cbk,
- subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock);
+ STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index 4f65b4671..4b3f3a049 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -28,16 +19,20 @@
int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);
int
dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
int ret = -1;
+ xlator_t *subvol = NULL;
- if (op_ret == -1) {
+ if (op_ret == -1 && (op_errno != ENOENT)) {
goto out;
}
@@ -59,6 +54,7 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->rebalance.target_op_fn = dht_writev2;
+ local->op_errno = op_errno;
/* Phase 2 of migration */
if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
@@ -71,8 +67,8 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
+ ret = dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
dht_writev2 (this, frame, 0);
return 0;
}
@@ -85,7 +81,8 @@ out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -95,14 +92,10 @@ dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
local = frame->local;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -112,15 +105,16 @@ dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
STACK_WIND (frame, dht_writev_cbk,
subvol, subvol->fops->writev,
local->fd, local->rebalance.vector, local->rebalance.count,
- local->rebalance.offset, local->rebalance.iobref);
+ local->rebalance.offset, local->rebalance.flags,
+ local->rebalance.iobref, NULL);
return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int count, off_t off,
- struct iobref *iobref)
+dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -149,18 +143,19 @@ dht_writev (call_frame_t *frame, xlator_t *this,
local->rebalance.vector = iov_dup (vector, count);
local->rebalance.offset = off;
local->rebalance.count = count;
+ local->rebalance.flags = flags;
local->rebalance.iobref = iobref_ref (iobref);
local->call_cnt = 1;
STACK_WIND (frame, dht_writev_cbk,
subvol, subvol->fops->writev,
- fd, vector, count, off, iobref);
+ fd, vector, count, off, flags, iobref, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -170,11 +165,13 @@ err:
int
dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -204,6 +201,7 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->rebalance.target_op_fn = dht_truncate2;
+ local->op_errno = op_errno;
/* Phase 2 of migration */
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
@@ -215,8 +213,9 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
dht_truncate2 (this, frame, 0);
return 0;
}
@@ -229,7 +228,7 @@ out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
err:
return 0;
}
@@ -240,16 +239,13 @@ dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ inode_t *inode = NULL;
local = frame->local;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ inode = local->fd ? local->fd->inode : local->loc.inode;
+ dht_inode_ctx_get1 (this, inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -258,18 +254,19 @@ dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
if (local->fop == GF_FOP_TRUNCATE) {
STACK_WIND (frame, dht_truncate_cbk, subvol,
subvol->fops->truncate, &local->loc,
- local->rebalance.offset);
+ local->rebalance.offset, NULL);
} else {
STACK_WIND (frame, dht_truncate_cbk, subvol,
subvol->fops->ftruncate, local->fd,
- local->rebalance.offset);
+ local->rebalance.offset, NULL);
}
return 0;
}
int
-dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -299,19 +296,20 @@ dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
STACK_WIND (frame, dht_truncate_cbk,
subvol, subvol->fops->truncate,
- loc, offset);
+ loc, offset, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int
-dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -339,22 +337,423 @@ dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
STACK_WIND (frame, dht_truncate_cbk,
subvol, subvol->fops->ftruncate,
- fd, offset);
+ fd, offset, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_fallocate2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_fallocate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
+ local->fd, local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, NULL);
+
+ return 0;
+}
+
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fallocate_cbk,
+ subvol, subvol->fops->fallocate,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_discard2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_discard2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (discard, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_zerofill2;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_zerofill2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
/* handle cases of migration here for 'setattr()' calls */
int
dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -391,7 +790,7 @@ out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
@@ -401,15 +800,13 @@ dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ inode_t *inode = NULL;
local = frame->local;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -419,11 +816,13 @@ dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
if (local->fop == GF_FOP_SETATTR) {
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->setattr, &local->loc,
- &local->rebalance.stbuf, local->rebalance.flags);
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
} else {
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->fsetattr, local->fd,
- &local->rebalance.stbuf, local->rebalance.flags);
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
}
return 0;
@@ -434,7 +833,7 @@ dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
int
dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -465,7 +864,7 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
+ &local->prebuf, &local->stbuf, xdata);
return 0;
}
@@ -473,7 +872,7 @@ unlock:
int
dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_layout_t *layout = NULL;
@@ -519,7 +918,7 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
@@ -530,14 +929,14 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame, dht_setattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -545,7 +944,7 @@ err:
int
dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_layout_t *layout = NULL;
@@ -590,7 +989,7 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
@@ -601,14 +1000,14 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
STACK_WIND (frame, dht_setattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 3c8f63f5b..38e9970a7 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -68,9 +59,7 @@ dht_layout_t *
dht_layout_get (xlator_t *this, inode_t *inode)
{
dht_conf_t *conf = NULL;
- uint64_t layout_int = 0;
dht_layout_t *layout = NULL;
- int ret = -1;
conf = this->private;
if (!conf)
@@ -78,9 +67,8 @@ dht_layout_get (xlator_t *this, inode_t *inode)
LOCK (&conf->layout_lock);
{
- ret = inode_ctx_get (inode, this, &layout_int);
- if (ret == 0) {
- layout = (dht_layout_t *) (unsigned long) layout_int;
+ dht_inode_ctx_layout_get (inode, this, &layout);
+ if (layout) {
layout->ref++;
}
}
@@ -98,7 +86,6 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
int oldret = -1;
int ret = 0;
dht_layout_t *old_layout;
- uint64_t old_layout_int;
conf = this->private;
if (!conf)
@@ -106,16 +93,13 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
LOCK (&conf->layout_lock);
{
- oldret = inode_ctx_get (inode, this, &old_layout_int);
-
+ oldret = dht_inode_ctx_layout_get (inode, this, &old_layout);
layout->ref++;
- ret = inode_ctx_put (inode, this, (uint64_t) (unsigned long)
- layout);
+ dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
- if (oldret == 0) {
- old_layout = (dht_layout_t *) (unsigned long) old_layout_int;
+ if (!oldret) {
dht_layout_unref (this, old_layout);
}
@@ -130,7 +114,7 @@ dht_layout_unref (xlator_t *this, dht_layout_t *layout)
dht_conf_t *conf = NULL;
int ref = 0;
- if (layout->preset || !this->private)
+ if (!layout || layout->preset || !this->private)
return;
conf = this->private;
@@ -174,9 +158,9 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
int ret = 0;
- ret = dht_hash_compute (layout->type, name, &hash);
+ ret = dht_hash_compute (this, layout->type, name, &hash);
if (ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"hash computation failed for type=%d name=%s",
layout->type, name);
goto out;
@@ -191,7 +175,7 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
}
if (!subvol) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"no subvolume for hash (value) = %u", hash);
}
@@ -280,6 +264,9 @@ dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
if (disk_layout_p)
*disk_layout_p = disk_layout;
+ else
+ GF_FREE (disk_layout);
+
ret = 0;
out:
@@ -309,14 +296,18 @@ dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
cnt = ntoh32 (disk_layout[0]);
if (cnt != 1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_ERROR,
"disk layout has invalid count %d", cnt);
return -1;
}
- switch (disk_layout[1]) {
+ type = ntoh32 (disk_layout[1]);
+ switch (type) {
+ case DHT_HASH_TYPE_DM_USER:
+ gf_log (this->name, GF_LOG_DEBUG, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
case DHT_HASH_TYPE_DM:
- type = ntoh32 (disk_layout[1]);
break;
default:
gf_log (this->name, GF_LOG_CRITICAL,
@@ -344,11 +335,12 @@ int
dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
int op_ret, int op_errno, dict_t *xattr)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- void *disk_layout_raw = NULL;
- int disk_layout_len = 0;
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
if (op_ret != 0) {
err = op_errno;
@@ -369,12 +361,12 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (xattr) {
/* during lookup and not mkdir */
- ret = dict_get_ptr_and_len (xattr, "trusted.glusterfs.dht",
+ ret = dict_get_ptr_and_len (xattr, conf->xattr_name,
&disk_layout_raw, &disk_layout_len);
}
if (ret != 0) {
- layout->list[i].err = -1;
+ layout->list[i].err = 0;
gf_log (this->name, GF_LOG_TRACE,
"missing disk layout on %s. err = %d",
subvol->name, err);
@@ -385,7 +377,7 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
disk_layout_len);
if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"layout merge from subvolume %s failed",
subvol->name);
goto out;
@@ -421,6 +413,22 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
layout->list[j].err = err_swap;
}
+void
+dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+}
+
int64_t
dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
{
@@ -428,17 +436,37 @@ dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
layout->list[j].xlator->name));
}
+
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, xlator->name))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
int64_t
dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
{
int64_t diff = 0;
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t) layout->list[i].stop
+ - (int64_t) layout->list[j].stop;
+ goto out;
+ }
if (layout->list[i].err || layout->list[j].err)
diff = layout->list[i].err - layout->list[j].err;
else
diff = (int64_t) layout->list[i].start
- (int64_t) layout->list[j].start;
+out:
return diff;
}
@@ -487,7 +515,8 @@ dht_layout_sort_volname (dht_layout_t *layout)
int
dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
uint32_t overlaps = 0;
uint32_t missing = 0;
@@ -500,6 +529,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t prev_stop = 0;
uint32_t last_stop = 0;
char is_virgin = 1;
+ uint32_t no_space = 0;
/* TODO: explain what is happening */
@@ -507,23 +537,30 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
prev_stop = last_stop;
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- down++;
- break;
- default:
- misc++;
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
}
+ break;
+ default:
+ misc++;
continue;
- }
+ }
is_virgin = 0;
@@ -556,6 +593,9 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
if (misc_p)
*misc_p = misc;
+ if (no_space_p)
+ *no_space_p = no_space;
+
return ret;
}
@@ -571,7 +611,6 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
uint32_t down = 0;
uint32_t misc = 0;
-
ret = dht_layout_sort (layout);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
@@ -581,7 +620,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_anomalies (this, loc, layout,
&holes, &overlaps,
- &missing, &down, &misc);
+ &missing, &down, &misc, NULL);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"error while finding anomalies in %s -- not good news",
@@ -599,43 +638,56 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
"found anomalies in %s. holes=%d overlaps=%d",
loc->path, holes, overlaps);
}
- ret = 1;
+ ret = -1;
}
for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place to
- * detect this - probably in dht_layout_anomalies()
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
*/
if (layout->list[i].err > 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path, strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name : "<>"));
- if (layout->list[i].err == ENOENT)
- ret = 1;
+ gf_log_callingfn (this->name, GF_LOG_DEBUG,
+ "path=%s err=%s on subvol=%s",
+ loc->path,
+ strerror (layout->list[i].err),
+ (layout->list[i].xlator ?
+ layout->list[i].xlator->name
+ : "<>"));
+ if ((layout->list[i].err == ENOENT) && (ret >= 0)) {
+ ret++;
+ }
}
}
+
out:
return ret;
}
+int
+dht_dir_has_layout (dict_t *xattr, char *name)
+{
+
+ void *disk_layout_raw = NULL;
+
+ return dict_get_ptr (xattr, name, &disk_layout_raw);
+}
int
dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
loc_t *loc, dict_t *xattr)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t disk_layout[4];
- void *disk_layout_raw = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ int32_t count = -1;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ dht_conf_t *conf = this->private;
for (idx = 0; idx < layout->cnt; idx++) {
@@ -665,7 +717,7 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
goto out;
}
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ dict_ret = dict_get_ptr (xattr, conf->xattr_name,
&disk_layout_raw);
if (dict_ret < 0) {
@@ -681,7 +733,7 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
count = ntoh32 (disk_layout[0]);
if (count != 1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_ERROR,
"%s - disk layout has invalid count %d",
loc->path, count);
ret = -1;
@@ -730,7 +782,7 @@ dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
LOCK (&conf->layout_lock);
{
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 2186b064a..dbc9d0b3c 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -28,37 +19,106 @@
#include "compat.h"
#include "dht-common.h"
+int
+dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_log (this->name, GF_LOG_WARNING, "got non-linkfile %s:%s",
+ prev->this->name, local->loc.path);
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, postparent, postparent,
+ xattr);
+ return 0;
+}
+#define is_equal(a, b) (a == b)
int
dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
local = frame->local;
+ if (!op_ret)
+ local->linked = _gf_true;
+
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new ();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set linkto key");
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_linkfile_lookup_cbk, subvol,
+ subvol->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+ }
+out:
local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent,
+ xdata);
+ if (xattrs)
+ dict_unref (xattrs);
return 0;
}
int
dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this,
xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
{
dht_local_t *local = NULL;
dict_t *dict = NULL;
int need_unref = 0;
int ret = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
local->linkfile.linkfile_cbk = linkfile_cbk;
local->linkfile.srcvol = tovol;
+ local->linked = _gf_false;
+
dict = local->params;
if (!dict) {
dict = dict_new ();
@@ -74,8 +134,12 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
"%s: gfid set failed", loc->path);
}
- ret = dict_set_str (dict, "trusted.glusterfs.dht.linkto",
- tovol->name);
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log ("dht-linkfile", GF_LOG_INFO,
+ "%s: internal-fop set failed", loc->path);
+
+ ret = dict_set_str (dict, conf->link_xattr_name, tovol->name);
if (ret < 0) {
gf_log (frame->this->name, GF_LOG_INFO,
@@ -84,9 +148,13 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
goto out;
}
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO (frame, dht_local_t);
STACK_WIND (frame, dht_linkfile_create_cbk,
fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0, dict);
+ S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
if (need_unref && dict)
dict_unref (dict);
@@ -94,7 +162,7 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
return 0;
out:
local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
- loc->inode, NULL, NULL, NULL);
+ loc->inode, NULL, NULL, NULL, NULL);
if (need_unref && dict)
dict_unref (dict);
@@ -106,7 +174,8 @@ out:
int
dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -150,7 +219,7 @@ dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
subvol, subvol->fops->unlink,
- &unlink_local->loc);
+ &unlink_local->loc, 0, NULL);
return 0;
err:
@@ -175,7 +244,7 @@ dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
if (!xattr)
goto out;
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname);
if ((-1 == ret) || !volname)
goto out;
@@ -190,3 +259,70 @@ dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
out:
return subvol;
}
+
+int
+dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_log (this->name, GF_LOG_ERROR, "setattr of uid/gid on %s"
+ " :<gfid:%s> failed (%s)",
+ (loc->path? loc->path: "NULL"),
+ uuid_utoa(local->gfid), strerror(op_errno));
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {0,};
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
+
+ uuid_copy (local->loc.gfid, local->stbuf.ia_gfid);
+
+ copy = copy_frame (frame);
+
+ if (!copy)
+ goto out;
+
+ copy_local = dht_local_init (copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+
+ FRAME_SU_DO (copy, dht_local_t);
+
+ STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol,
+ subvol->fops->setattr, &copy_local->loc,
+ &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 21fb5a7ca..e893eb48f 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -28,7 +19,6 @@ enum gf_dht_mem_types_ {
gf_dht_mt_dht_conf_t,
gf_dht_mt_char,
gf_dht_mt_int32_t,
- gf_dht_mt_dht_local_t,
gf_dht_mt_xlator_t,
gf_dht_mt_dht_layout_t,
gf_switch_mt_dht_conf_t,
@@ -37,6 +27,9 @@ enum gf_dht_mem_types_ {
gf_switch_mt_switch_struct,
gf_dht_mt_subvol_time,
gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_ctx_stat_time_t,
gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 70950e83e..bcb19f23e 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -24,11 +15,12 @@
#endif
#include "dht-common.h"
+#include "xlator.h"
+#include <fnmatch.h>
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
#define DHT_REBALANCE_BLKSIZE (128 * 1024)
-#define DHT_MIGRATE_EVEN_IF_LINK_EXISTS 1
static int
dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
@@ -60,7 +52,7 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
ret = syncop_write (to, fd, (buf + tmp_offset),
(start_idx - tmp_offset),
(offset + tmp_offset),
- iobref);
+ iobref, 0);
/* 'path' will be logged in calling function */
if (ret < 0) {
gf_log (THIS->name, GF_LOG_WARNING,
@@ -78,7 +70,7 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
/* This means, last chunk is not yet written.. write it */
ret = syncop_write (to, fd, (buf + tmp_offset),
(buf_len - tmp_offset),
- (offset + tmp_offset), iobref);
+ (offset + tmp_offset), iobref, 0);
if (ret < 0) {
/* 'path' will be logged in calling function */
gf_log (THIS->name, GF_LOG_WARNING,
@@ -99,8 +91,118 @@ out:
}
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this, out);
+ GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (uuid_is_null (loc->pargfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
+ cached_subvol->name, hashed_subvol->name);
+ data = dict_get (xattrs, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str (xattrs, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "linkto xattr in dict for %s", loc->name);
+ goto out;
+ }
+
+ ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
+ "failed %s -> %s (%s)", cached_subvol->name,
+ loc->name, strerror (errno));
+ goto out;
+ }
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
+ if (!linkto_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "linkto subvol for %s", loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link (hashed_subvol, loc, loc);
+ if (ret) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
+ " failed on subvol %s (%s)", loc->name,
+ uuid_utoa(loc->gfid),
+ hashed_subvol->name, strerror (op_errno));
+ if (op_errno != EEXIST)
+ goto out;
+ }
+ }
+ ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
+ , loc->name, hashed_subvol->name, strerror (errno));
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf->ia_nlink) {
+ ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+
static inline int
-__is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf)
+__is_file_migratable (xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, dict_t *xattrs, int flags)
{
int ret = -1;
@@ -111,11 +213,25 @@ __is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf)
goto out;
}
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ goto out;
+ }
if (stbuf->ia_nlink > 1) {
- /* TODO : support migrating hardlinks */
- gf_log (this->name, GF_LOG_WARNING, "%s: file has hardlinks",
- loc->path);
- ret = -ENOTSUP;
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ ret = gf_defrag_handle_hardlink (this, loc,
+ xattrs, stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file has hardlinks", loc->path);
+ }
+ ret = ENOTSUP;
goto out;
}
@@ -127,14 +243,16 @@ out:
static inline int
__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd)
+ dict_t *dict, fd_t **dst_fd, dict_t *xattr)
{
- xlator_t *this = NULL;
- int ret = -1;
- fd_t *fd = NULL;
- struct iatt new_stbuf = {0,};
+ xlator_t *this = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {0,};
+ dht_conf_t *conf = NULL;
this = THIS;
+ conf = this->private;
ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
if (ret) {
@@ -143,7 +261,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
goto out;
}
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name);
+ ret = dict_set_str (dict, conf->link_xattr_name, from->name);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to set gfid in dict for create", loc->path);
@@ -181,7 +299,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
/* Create the destination with LINKFILE mode, and linkto xattr,
if the linkfile already exists, it will just open the file */
ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
- dict);
+ dict, &new_stbuf);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to create %s on %s (%s)",
@@ -189,6 +307,26 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
goto out;
}
+ ret = syncop_fsetxattr (to, fd, xattr, 0);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_ftruncate (to, fd, stbuf->ia_size);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_fsetattr (to, fd, stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "chown failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
if (dst_fd)
*dst_fd = fd;
@@ -201,13 +339,16 @@ out:
static inline int
__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf)
+ struct iatt *stbuf, int flag)
{
struct statvfs src_statfs = {0,};
struct statvfs dst_statfs = {0,};
int ret = -1;
xlator_t *this = NULL;
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+
this = THIS;
ret = syncop_statfs (from, loc, &src_statfs);
@@ -225,18 +366,47 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
loc->path, to->name, strerror (errno));
goto out;
}
- if (((dst_statfs.f_bavail *
- dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) >
- (((src_statfs.f_bavail * src_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) - stbuf->ia_blocks)) {
- gf_log (this->name, GF_LOG_WARNING,
- "data movement attempted from node (%s) with"
- " higher disk space to a node (%s) with "
- "lesser disk space (%s)", from->name,
- to->name, loc->path);
- /* this is not a 'failure', but we don't want to
- consider this as 'success' too :-/ */
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid errorneous move to destination where
+ the space could be scantily available.
+ */
+ if (stbuf) {
+ dst_statfs_blocks = ((dst_statfs.f_bavail *
+ dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ src_statfs_blocks = ((src_statfs.f_bavail *
+ src_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ if ((dst_statfs_blocks - stbuf->ia_blocks) <
+ (src_statfs_blocks + stbuf->ia_blocks)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "data movement attempted from node (%s) with"
+ " higher disk space to a node (%s) with "
+ "lesser disk space (%s)", from->name,
+ to->name, loc->path);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ ret = 1;
+ goto out;
+ }
+ }
+check_avail_space:
+ if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "data movement attempted from node (%s) with "
+ "to node (%s) which does not have required free space"
+ " for %s", from->name, to->name, loc->path);
ret = 1;
goto out;
}
@@ -263,7 +433,7 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst
read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
DHT_REBALANCE_BLKSIZE : (ia_size - total));
ret = syncop_readv (from, src, read_size,
- offset, &vector, &count, &iobref);
+ offset, 0, &vector, &count, &iobref);
if (!ret || (ret < 0)) {
break;
}
@@ -273,15 +443,14 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst
ret, offset, iobref);
else
ret = syncop_writev (to, dst, vector, count,
- offset, iobref);
+ offset, iobref, 0);
if (ret < 0) {
break;
}
offset += ret;
total += ret;
- if (vector)
- GF_FREE (vector);
+ GF_FREE (vector);
if (iobref)
iobref_unref (iobref);
iobref = NULL;
@@ -289,8 +458,7 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst
}
if (iobref)
iobref_unref (iobref);
- if (vector)
- GF_FREE (vector);
+ GF_FREE (vector);
if (ret >= 0)
ret = 0;
@@ -308,8 +476,10 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
dict_t *dict = NULL;
xlator_t *this = NULL;
struct iatt iatt = {0,};
+ dht_conf_t *conf = NULL;
this = THIS;
+ conf = this->private;
fd = fd_create (loc->inode, DHT_REBALANCE_PID);
if (!fd) {
@@ -332,7 +502,7 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
if (!dict)
goto out;
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name);
+ ret = dict_set_str (dict, conf->link_xattr_name, to->name);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to set xattr in dict for %s (linkto:%s)",
@@ -385,12 +555,13 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
dict_t *dict = NULL;
char *link = NULL;
struct iatt stbuf = {0,};
+ dht_conf_t *conf = this->private;
dict = dict_new ();
if (!dict)
goto out;
- ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256);
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to set 'linkto' key in dict", loc->path);
@@ -406,12 +577,13 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
}
/* we no more require this key */
- dict_del (dict, DHT_LINKFILE_KEY);
+ dict_del (dict, conf->link_xattr_name);
/* file exists in target node, only if it is 'linkfile' its valid,
otherwise, error out */
if (!ret) {
- if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict)) {
+ if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
gf_log (this->name, GF_LOG_WARNING,
"%s: file exists in destination", loc->path);
ret = -1;
@@ -447,7 +619,7 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
goto out;
}
- ret = syncop_symlink (to, loc, link, dict);
+ ret = syncop_symlink (to, loc, link, dict, 0);
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: creating symlink failed (%s)",
@@ -461,7 +633,7 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
buf->ia_type),
makedev (ia_major (buf->ia_rdev),
- ia_minor (buf->ia_rdev)), dict);
+ ia_minor (buf->ia_rdev)), dict, 0);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)",
loc->path, strerror (errno));
@@ -469,6 +641,15 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
}
done:
+ ret = syncop_setattr (to, loc, buf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
ret = syncop_unlink (from, loc);
if (ret)
gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)",
@@ -504,7 +685,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
fd_t *dst_fd = NULL;
dict_t *dict = NULL;
dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -513,19 +696,29 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (!dict)
goto out;
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
/* Phase 1 - Data migration is in progress from now on */
- ret = syncop_lookup (from, loc, NULL, &stbuf, NULL, NULL);
+ ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
loc->path, from->name, strerror (errno));
goto out;
}
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
/* preserve source mode, so set the same to the destination */
src_ia_prot = stbuf.ia_prot;
/* Check if file can be migrated */
- ret = __is_file_migratable (this, loc, &stbuf);
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
if (ret)
goto out;
@@ -536,18 +729,22 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get xattr from %s (%s)",
+ loc->path, from->name, strerror (errno));
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
- dict, &dst_fd);
+ dict, &dst_fd, xattr);
if (ret)
goto out;
- /* Should happen on all files when 'force' option is not given */
- if (flag != DHT_MIGRATE_EVEN_IF_LINK_EXISTS) {
- ret = __dht_check_free_space (to, from, loc, &stbuf);
- if (ret) {
- goto out;
- }
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
+ if (ret) {
+ goto out;
}
/* Open the source, and also update mode/xattr */
@@ -558,6 +755,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
ret = syncop_fstat (from, src_fd, &stbuf);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)",
@@ -587,22 +785,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* TODO: move all xattr related operations to fd based operations */
- ret = syncop_listxattr (from, loc, &xattr);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get xattr from %s (%s)",
- loc->path, from->name, strerror (errno));
-
- ret = syncop_setxattr (to, loc, xattr, 0);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set xattr on %s (%s)",
- loc->path, to->name, strerror (errno));
-
/* TODO: Sync the locks */
- ret = syncop_fsync (to, dst_fd);
+ ret = syncop_fsync (to, dst_fd, 0);
if (ret)
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to fsync on %s (%s)",
@@ -639,6 +824,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to perform setattr on %s (%s)",
loc->path, to->name, strerror (errno));
+ goto out;
}
/* Because 'futimes' is not portable */
@@ -659,6 +845,24 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
gf_log (this->name, GF_LOG_WARNING, \
"%s: failed to perform setattr on %s (%s)",
loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate (from, src_fd, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
}
/* Do a stat and check the gfid before unlink */
@@ -667,38 +871,23 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to do a stat on %s (%s)",
loc->path, from->name, strerror (errno));
+ goto out;
}
- if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) {
+ if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
/* take out the source from namespace */
ret = syncop_unlink (from, loc);
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to perform unlink on %s (%s)",
loc->path, from->name, strerror (errno));
+ goto out;
}
}
- /* Free up the data blocks on the source node, as the whole
- file is migrated */
- ret = syncop_ftruncate (from, src_fd, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform truncate on %s (%s)",
- loc->path, from->name, strerror (errno));
- }
-
- /* remove the 'linkto' xattr from the destination */
- ret = syncop_removexattr (to, loc, DHT_LINKFILE_KEY);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform removexattr on %s (%s)",
- loc->path, to->name, strerror (errno));
- }
-
ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s: failed to lookup the file on subvolumes (%s)",
loc->path, strerror (errno));
}
@@ -714,6 +903,8 @@ out:
if (xattr)
dict_unref (xattr);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
if (dst_fd)
syncop_close (dst_fd);
@@ -788,7 +979,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
op_errno = EPERM;
}
- DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
return 0;
}
@@ -796,12 +987,829 @@ int
dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
{
int ret = -1;
- dht_conf_t *conf = NULL;
- conf = this->private;
-
- ret = synctask_new (conf->env, rebalance_task,
+ ret = synctask_new (this->ctx->env, rebalance_task,
rebalance_task_completion,
frame, frame);
return ret;
}
+
+int
+gf_listener_stop (xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((errno != ENOSPC) || (errno != ENOTCONN))
+ return 1;
+
+ if (errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+ out:
+ return ret;
+}
+
+/* We do a depth first traversal of directories. But before we move into
+ * subdirs, we complete the data migration of those directories whose layouts
+ * have been fixed
+ */
+
+int
+gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ dict_t *dict = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
+ int readdir_operrno = 0;
+ struct timeval dir_start = {0,};
+ struct timeval end = {0,};
+ double elapsed = {0,};
+ struct timeval start = {0,};
+ int32_t err = 0;
+
+ gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
+ loc->path);
+ gettimeofday (&dir_start, NULL);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0) {
+
+ if (ret < 0) {
+
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s."
+ " Aborting migrate-data",
+ strerror(readdir_operrno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdir_operrno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&start, NULL);
+ }
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match (defrag, entry->d_name,
+ entry->d_stat.ia_size)
+ == _gf_false)) {
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_NODE_UUID_KEY);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid for %s", entry_loc.path);
+ continue;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
+ &uuid_str);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid from dict for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ if (uuid_parse (uuid_str, node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
+ "failed for %s", entry_loc.path);
+ continue;
+ }
+
+ /* if file belongs to different node, skip migration
+ * the other node will take responsibility of migration
+ */
+ if (uuid_compare (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_TRACE, "%s does not"
+ "belong to this node", entry_loc.path);
+ continue;
+ }
+
+ uuid_str = NULL;
+
+ dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+
+
+ /* if distribute is present, it will honor this key.
+ * -1 is returned if distribute is not present or file
+ * doesn't have a link-file. If file has link-file, the
+ * path of link-file will be the value, and also that
+ * guarantees that file has to be mostly migrated */
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_LINKINFO_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE, "failed to "
+ "get link-to key for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data,
+ 0);
+ if (ret) {
+ err = op_errno;
+ /* errno is overloaded. See
+ * rebalance_task_completion () */
+ if (err != ENOSPC) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data skipped for %s"
+ " due to space constraints",
+ entry_loc.path);
+ defrag->skipped +=1;
+ } else{
+ gf_log (this->name, GF_LOG_ERROR,
+ "migrate-data failed for %s",
+ entry_loc.path);
+ defrag->total_failures +=1;
+ }
+ }
+
+ if (ret == -1) {
+ op_errno = errno;
+ ret = gf_defrag_handle_migrate_error (op_errno,
+ defrag);
+
+ if (!ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data on %s failed: %s",
+ entry_loc.path,
+ strerror (op_errno));
+ else if (ret == 1)
+ continue;
+ else if (ret == -1)
+ goto out;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+ (end.tv_usec - start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration of "
+ "file:%s size:%"PRIu64" bytes took %.2f"
+ "secs", entry_loc.path, iatt.ia_size,
+ elapsed/1e6);
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+
+ if (readdir_operrno == ENOENT)
+ break;
+ }
+
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+ (end.tv_usec - dir_start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
+ "%.2f secs", loc->path, elapsed/1e6);
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+
+}
+
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ dict_t *dict = NULL;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+ int readdirp_errno = 0;
+
+ ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
+ if (ret)
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
+ ". Aborting fix-layout",strerror(errno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdirp_errno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ defrag->total_failures ++;
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Fix layout "
+ "failed for %s", entry_loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ if (readdirp_errno == ENOENT)
+ break;
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+
+}
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ if (!this)
+ goto out;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ gettimeofday (&defrag->start_time, NULL);
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data", "force");
+ else
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data",
+ "non-force");
+ if (ret)
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+
+
+out:
+ LOCK (&defrag->lock);
+ {
+ status = dict_new ();
+ gf_defrag_status_get (defrag, status);
+ if (ctx->notify)
+ ctx->notify (GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref (status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag) {
+ GF_FREE (defrag);
+ conf->defrag = NULL;
+ }
+
+ return ret;
+}
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop (sync_frame->this);
+
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create"
+ " task for rebalance");
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ struct timeval end = {0,};
+
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ gettimeofday (&end, NULL);
+
+ elapsed = end.tv_sec - defrag->start_time.tv_sec;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+ if (elapsed) {
+ ret = dict_set_double (dict, "run-time", elapsed);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set run-time");
+ }
+
+ ret = dict_set_uint64 (dict, "failures", failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set failure count");
+
+ ret = dict_set_uint64 (dict, "skipped", skipped);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set skipped file count");
+log:
+ switch (defrag->defrag_status) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f "
+ "secs", status, elapsed);
+ gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
+ "%"PRIu64, files, size, lookup, failures, skipped);
+
+
+out:
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_INFO, "Received stop command on rebalance");
+ defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ if (output)
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 02b12887f..5d6f4f232 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
@@ -35,7 +26,8 @@ int
dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -84,7 +76,7 @@ unwind:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, xdata);
}
return 0;
@@ -97,7 +89,7 @@ dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preoldparent,
struct iatt *postoldparent,
struct iatt *prenewparent,
- struct iatt *postnewparent)
+ struct iatt *postnewparent, dict_t *xdata)
{
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
@@ -147,7 +139,7 @@ dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, dht_rename_dir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
if (!--call_cnt)
break;
}
@@ -164,7 +156,7 @@ unwind:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -185,19 +177,20 @@ dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
STACK_WIND (frame, dht_rename_hashed_dir_cbk,
local->dst_hashed,
local->dst_hashed->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
return 0;
err:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
int
dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -226,7 +219,7 @@ dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -246,7 +239,7 @@ dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, dht_rename_readdir_cbk,
prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ local->fd, 4096, 0, NULL);
return 0;
@@ -302,22 +295,54 @@ dht_rename_dir (call_frame_t *frame, xlator_t *this)
STACK_WIND (frame, dht_rename_opendir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd);
+ &local->loc2, local->fd, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
+#define DHT_MARK_FOP_INTERNAL(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set" \
+ " internal dict key for %s", local->loc.path); \
+ } \
+ }while (0)
+int
+dht_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ local = frame->local;
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+}
int
dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -346,11 +371,7 @@ dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
WIPE (&local->postparent);
if (is_last_call (this_call_cnt)) {
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_done (frame, this);
}
out:
@@ -368,7 +389,7 @@ dht_rename_cleanup (call_frame_t *frame)
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
int call_cnt = 0;
-
+ dict_t *xattr = NULL;
local = frame->local;
this = frame->this;
@@ -392,13 +413,15 @@ dht_rename_cleanup (call_frame_t *frame)
if (!call_cnt)
goto nolinks;
+ DHT_MARK_FOP_INTERNAL (xattr);
+
if (dst_hashed != src_hashed && dst_hashed != src_cached) {
gf_log (this->name, GF_LOG_TRACE,
"unlinking linkfile %s @ %s => %s",
local->loc.path, dst_hashed->name, src_cached->name);
STACK_WIND (frame, dht_rename_unlink_cbk,
dst_hashed, dst_hashed->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr);
}
if (src_cached != dst_hashed) {
@@ -407,9 +430,12 @@ dht_rename_cleanup (call_frame_t *frame)
local->loc2.path, src_cached->name);
STACK_WIND (frame, dht_rename_unlink_cbk,
src_cached, src_cached->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr);
}
+ if (xattr)
+ dict_unref (xattr);
+
return 0;
nolinks:
@@ -422,7 +448,7 @@ nolinks:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
return 0;
}
@@ -430,9 +456,10 @@ nolinks:
int
dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_frame_t *prev = NULL;
dht_local_t *local = NULL;
@@ -446,6 +473,10 @@ dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path, prev->this->name, strerror (op_errno));
}
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
DHT_STACK_DESTROY (frame);
return 0;
@@ -456,7 +487,8 @@ int
dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -467,6 +499,7 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *rename_subvol = NULL;
call_frame_t *link_frame = NULL;
dht_local_t *link_local = NULL;
+ dict_t *xattr = NULL;
local = frame->local;
prev = cookie;
@@ -476,6 +509,8 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dst_hashed = local->dst_hashed;
dst_cached = local->dst_cached;
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO (frame, dht_local_t);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"%s: rename on %s failed (%s)", local->loc.path,
@@ -505,15 +540,25 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
uuid_copy (link_local->gfid, local->loc.inode->gfid);
dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
- src_cached, dst_hashed, &link_local->loc);
+ this, src_cached, dst_hashed,
+ &link_local->loc);
}
err:
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent, prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent, prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent, prev->this);
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev->this == src_cached) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+ }
+
/* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
* is called. since rename has already happened on rename_subvol,
@@ -538,6 +583,8 @@ err:
if (local->call_cnt == 0)
goto unwind;
+ DHT_MARK_FOP_INTERNAL (xattr);
+
if (src_cached != dst_hashed && src_cached != dst_cached) {
gf_log (this->name, GF_LOG_TRACE,
"deleting old src datafile %s @ %s",
@@ -545,7 +592,7 @@ err:
STACK_WIND (frame, dht_rename_unlink_cbk,
src_cached, src_cached->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr);
}
if (src_hashed != rename_subvol && src_hashed != src_cached) {
@@ -555,7 +602,7 @@ err:
STACK_WIND (frame, dht_rename_unlink_cbk,
src_hashed, src_hashed->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr);
}
if (dst_cached
@@ -567,8 +614,10 @@ err:
STACK_WIND (frame, dht_rename_unlink_cbk,
dst_cached, dst_cached->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr);
}
+ if (xattr)
+ dict_unref (xattr);
return 0;
unwind:
@@ -576,16 +625,16 @@ unwind:
WIPE (&local->postoldparent);
WIPE (&local->preparent);
WIPE (&local->postparent);
+ if (xattr)
+ dict_unref (xattr);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_done (frame, this);
return 0;
cleanup:
+ if (xattr)
+ dict_unref (xattr);
dht_rename_cleanup (frame);
return 0;
@@ -619,9 +668,11 @@ dht_do_rename (call_frame_t *frame)
"renaming %s => %s (%s)",
local->loc.path, local->loc2.path, rename_subvol->name);
+ if (local->linked == _gf_true)
+ FRAME_SU_DO (frame, dht_local_t);
STACK_WIND (frame, dht_rename_cbk,
rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
return 0;
}
@@ -631,7 +682,8 @@ int
dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -646,7 +698,11 @@ dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"link/file on %s failed (%s)",
prev->this->name, strerror (op_errno));
local->op_ret = -1;
- local->op_errno = op_errno;
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ } else if (local->src_cached == prev->this) {
+ /* merge of attr returned only from linkfile creation */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
}
this_call_cnt = dht_frame_return (frame);
@@ -669,7 +725,8 @@ cleanup:
int
dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -711,6 +768,7 @@ dht_rename_create_links (call_frame_t *frame)
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
int call_cnt = 0;
+ dict_t *xattr = NULL;
local = frame->local;
@@ -721,6 +779,7 @@ dht_rename_create_links (call_frame_t *frame)
dst_hashed = local->dst_hashed;
dst_cached = local->dst_cached;
+ DHT_MARK_FOP_INTERNAL (xattr);
if (src_cached == dst_cached) {
if (dst_hashed == dst_cached)
@@ -732,7 +791,7 @@ dht_rename_create_links (call_frame_t *frame)
STACK_WIND (frame, dht_rename_unlink_links_cbk,
dst_hashed, dst_hashed->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr);
return 0;
}
@@ -749,7 +808,7 @@ dht_rename_create_links (call_frame_t *frame)
"linkfile %s @ %s => %s",
local->loc.path, dst_hashed->name, src_cached->name);
memcpy (local->gfid, local->loc.inode->gfid, 16);
- dht_linkfile_create (frame, dht_rename_links_cbk,
+ dht_linkfile_create (frame, dht_rename_links_cbk, this,
src_cached, dst_hashed, &local->loc);
}
@@ -759,7 +818,7 @@ dht_rename_create_links (call_frame_t *frame)
local->loc2.path, src_cached->name);
STACK_WIND (frame, dht_rename_links_cbk,
src_cached, src_cached->fops->link,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, xattr);
}
nolinks:
@@ -767,6 +826,8 @@ nolinks:
/* skip to next step */
dht_do_rename (frame);
}
+ if (xattr)
+ dict_unref (xattr);
return 0;
}
@@ -774,7 +835,7 @@ nolinks:
int
dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_t *src_cached = NULL;
xlator_t *src_hashed = NULL;
@@ -856,7 +917,8 @@ dht_rename (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 3342c35a9..3fe96b1c7 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -26,7 +17,7 @@
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
-
+#include "glusterfs-acl.h"
#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path) do { \
layout->list[i].start = srt; \
@@ -38,43 +29,40 @@
layout->list[i].xlator->name, path); \
} while (0)
+#define DHT_RESET_LAYOUT_RANGE(layout) do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++ ) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
-static inline uint32_t
-dht_find_overlap (int idx, int cnk_idx, uint32_t start, uint32_t stop,
- uint32_t chunk_size)
+static uint32_t
+dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
{
- uint32_t overlap = 0;
- uint32_t chunk_begin = 0;
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
- chunk_begin = cnk_idx * chunk_size;
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
- /* There is no chance of overlap */
- if ((chunk_begin > stop) ||
- ((chunk_begin + chunk_size) < start))
- goto out;
-
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) <= stop)) {
- overlap = ((chunk_begin + chunk_size) - start);
- goto out;
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
}
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - start);
- goto out;
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
}
- if ((chunk_begin < stop) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - chunk_begin);
- goto out;
- }
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
-out:
- return overlap;
+ return min (old->list[o].stop, new->list[n].stop) -
+ max (old->list[o].start, new->list[n].start) + 1;
}
+
int
dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
{
@@ -82,7 +70,7 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
local = frame->local;
local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno);
+ local->op_errno, NULL);
return 0;
}
@@ -90,7 +78,7 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
int
dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -129,18 +117,32 @@ dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
{
xlator_t *subvol = NULL;
dict_t *xattr = NULL;
int ret = 0;
xlator_t *this = NULL;
int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
-
- subvol = layout->list[i].xlator;
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
this = frame->this;
+ GF_VALIDATE_OR_GOTO ("", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, layout, err);
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
xattr = get_new_dict ();
if (!xattr) {
goto err;
@@ -154,8 +156,7 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
goto err;
}
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
+ ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"%s: (subvol %s) failed to set xattr dictionary",
@@ -171,9 +172,12 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
dict_ref (xattr);
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
+
STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
subvol, subvol->fops->setxattr,
- loc, xattr, 0);
+ loc, xattr, 0, NULL);
dict_unref (xattr);
@@ -183,11 +187,10 @@ err:
if (xattr)
dict_destroy (xattr);
- if (disk_layout)
- GF_FREE (disk_layout);
+ GF_FREE (disk_layout);
dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM);
+ -1, ENOMEM, NULL);
return 0;
}
@@ -198,21 +201,42 @@ dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
int i = 0;
int count = 0;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
gf_log (this->name, GF_LOG_DEBUG,
"writing the new range for all subvolumes");
- local->call_cnt = count = layout->cnt;
+ local->call_cnt = count = conf->subvolume_cnt;
for (i = 0; i < layout->cnt; i++) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--count == 0)
- break;
+ goto out;
}
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
return 0;
}
@@ -223,9 +247,12 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
int missing_xattr = 0;
int i = 0;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err != -1 || !layout->list[i].stop) {
@@ -254,18 +281,30 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
if (layout->list[i].err != -1 || !layout->list[i].stop)
continue;
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--missing_xattr == 0)
break;
}
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ }
+ }
+ dht_layout_unref (this, dummy);
+out:
return 0;
}
int
dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -306,6 +345,9 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
+
local->call_cnt = missing_attr;
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == -1) {
@@ -316,7 +358,7 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
}
}
@@ -327,7 +369,8 @@ int
dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -373,6 +416,46 @@ out:
return 0;
}
+void
+dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+{
+ data_t *acl_default = NULL;
+ data_t *acl_access = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ GF_ASSERT (xattr);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+
+ if (!acl_default) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_DEFAULT xattr not present");
+ goto cont;
+ }
+ ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_DEFAULT xattr");
+cont:
+ acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
+ if (!acl_access) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_ACCESS xattr not present");
+ goto out;
+ }
+ ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_ACCESS xattr");
+
+out:
+ return;
+}
int
dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
@@ -406,16 +489,19 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
if (ret)
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%s: failed to set gfid in dict", loc->path);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
dict = dict_ref (local->params);
}
+ /* Set acls */
+ if (local->xattr && dict)
+ dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
if (!dict)
gf_log (this->name, GF_LOG_WARNING,
- "dict is NULL, need to make sure gfid's are same");
+ "dict is NULL, need to make sure gfids are same");
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == ENOENT || force) {
@@ -429,7 +515,7 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
loc,
st_mode_from_ia (local->stbuf.ia_prot,
local->stbuf.ia_type),
- dict);
+ 0, dict);
}
}
@@ -448,7 +534,7 @@ dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
uint32_t hashval = 0;
int ret = 0;
- ret = dht_hash_compute (layout->type, loc->path, &hashval);
+ ret = dht_hash_compute (this, layout->type, loc->path, &hashval);
if (ret == 0) {
start = (hashval % layout->cnt);
}
@@ -471,7 +557,7 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
for (j = 0; j < conf->subvolume_cnt; j++) {
if (conf->decommissioned_bricks[j] &&
conf->decommissioned_bricks[j] == layout->list[i].xlator) {
- layout->list[i].err = -EINVAL;
+ layout->list[i].err = EINVAL;
break;
}
}
@@ -479,9 +565,33 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+
+ It is important to note that it is safe
+ to race with mkdir() as self-heal and
+ mkdir are idempotent operations. Both will
+ strive to set the directory and layouts to
+ the same final state.
+ */
count++;
+ if (!err)
+ layout->list[i].err = -1;
}
}
@@ -496,49 +606,126 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
}
}
- count = ((layout->spread_cnt) ? layout->spread_cnt :
- ((count) ? count : 1));
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-availbale). Else return count (available up bricks) */
+ count = ((layout->spread_cnt &&
+ (layout->spread_cnt <= count)) ?
+ layout->spread_cnt : ((count) ? count : 1));
return count;
}
+void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
+void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
+
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x,y) table[x*new->cnt+y]
+
+void
+dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname (old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap)*old->cnt*new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table,0,sizeof(overlap)*old->cnt*new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
+ }
+ }
+ }
+
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap (new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i,j);
+ OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
+ OV_ENTRY(max_overlap_idx,j) = overlap;
+ }
+ }
+ }
+}
+
+
dht_layout_t *
dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
- uint32_t chunk = 0;
- uint32_t start = 0;
- uint32_t stop = 0;
- uint32_t overlap = 0;
- uint32_t max_overlap = 0;
- uint32_t chunk_begin = 0;
- int count = 0;
- int cnt = 0;
int i = 0;
- int j = 0;
- int k = 0;
- int loop_cnt = 0;
- int start_subvol = 0;
- int *fix_array = NULL;
xlator_t *this = NULL;
dht_layout_t *new_layout = NULL;
dht_conf_t *priv = NULL;
dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ int ret = 0;
this = frame->this;
priv = this->private;
local = frame->local;
- count = cnt = dht_get_layout_count (this, layout, 0);
-
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
-
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
-
- fix_array = GF_CALLOC (sizeof (int), layout->cnt, gf_common_mt_char);
- if (!fix_array) {
- /* No fix, use the existing layout itself */
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "leaving %s alone",
+ loc->path);
goto done;
}
@@ -546,98 +733,33 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
if (!new_layout)
goto done;
- for (i = 0; i < new_layout->cnt; i++) {
- /* TODO: fix this in layout_alloc() itself */
- new_layout->list[i].err = -ENOENT;
- if (i < layout->cnt)
- new_layout->list[i].xlator = layout->list[i].xlator;
- }
-
- /* Check if there are any overlap in layout, and give the proper fix */
- for (i = 0; i < layout->cnt; i++) {
- /* No need to fix if 'err' is not '-1' */
- if (layout->list[i].err != -1)
- continue;
-
- /* If already existing layout is having no range, skip it */
- start = layout->list[i].start;
- stop = layout->list[i].stop;
- if ((stop - start) == 0)
- continue;
-
- max_overlap = 0;
+ /* If a subvolume is down, do not re-write the layout. */
+ ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
+ &subvol_down, NULL, NULL);
- /* 'j' is used as starting point of each chunk */
- for (j = 1; j <= count; j++) {
- /* if chunk is already used, don't use it again */
- for (k = 0; k < i; k++)
- if (j == fix_array[k])
- break;
- if (k < i)
- continue;
-
- overlap = dht_find_overlap (i, (j-1), start, stop, chunk);
- if (max_overlap < overlap) {
- max_overlap = overlap;
- fix_array[i] = j;
- }
- }
-
- /* If we have any overlap, then use that itself as new
- layout for the subvolume */
- if (fix_array[i]) {
- chunk_begin = chunk * (fix_array[i] - 1);
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin,
- chunk, cnt, loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (fix_array[i] == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
-
- }
+ if (subvol_down || (ret == -1)) {
+ gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down"
+ ". Skipping fix layout.", subvol_down);
+ GF_FREE (new_layout);
+ return NULL;
}
- /* Now, look for layouts which are not having any overlaps
- and give it a fix */
- for (loop_cnt = 0, i = start_subvol; loop_cnt < new_layout->cnt;
- i++, loop_cnt++) {
- if (i == new_layout->cnt)
- i = 0;
-
- /* If 'fix_array[i]' is set, the layout is already fixed. */
- if (fix_array[i])
- continue;
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
- if (layout->list[i].err != -1) {
- new_layout->list[i].err = layout->list[i].err;
- continue;
- }
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
- for (k = 1; k <= count; k++) {
- for (j = 0; j < new_layout->cnt; j++) {
- if (k == fix_array[j])
- break;
- }
- /* Didn't find any of the list begining with 'k' */
- if (j == new_layout->cnt)
- break;
- }
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname (new_layout);
+ dht_selfheal_layout_new_directory (frame, loc, new_layout);
- fix_array[i] = k;
- chunk_begin = (k - 1) * chunk;
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin, chunk, cnt,
- loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (k == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
- }
+ /* Now selectively re-assign ranges only when it helps */
+ dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, layout);
done:
if (new_layout) {
@@ -651,7 +773,7 @@ done:
local->layout = new_layout;
}
- return new_layout;
+ return local->layout;
}
@@ -675,9 +797,11 @@ dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE (layout);
for (i = start_subvol; i < layout->cnt; i++) {
err = layout->list[i].err;
- if (err == -1) {
+ if (err == -1 || err == ENOENT) {
DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
cnt, loc->path);
if (--cnt == 0) {
@@ -690,7 +814,7 @@ dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
for (i = 0; i < start_subvol; i++) {
err = layout->list[i].err;
- if (err == -1) {
+ if (err == -1 || err == ENOENT) {
DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
cnt, loc->path);
if (--cnt == 0) {
@@ -709,35 +833,17 @@ int
dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
+ uint32_t holes = 0;
int ret = -1;
int i = -1;
- int overlaps = -1;
+ uint32_t overlaps = 0;
- this = frame->this;
- conf = this->private;
local = frame->local;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
holes = local->selfheal.hole_cnt;
overlaps = local->selfheal.overlaps_cnt;
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
-
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
-
if (holes || overlaps) {
dht_selfheal_layout_new_directory (frame, loc, layout);
ret = 0;
@@ -789,6 +895,9 @@ dht_fix_directory_layout (call_frame_t *frame,
/* No layout sorting required here */
tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
return 0;
@@ -811,9 +920,8 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
dht_layout_anomalies (this, loc, layout,
&local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
down = local->selfheal.down;
misc = local->selfheal.misc;
@@ -822,14 +930,14 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.layout = dht_layout_ref (this, layout);
if (down) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%d subvolumes down -- not fixing", down);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%d subvolumes have unrecoverable errors", misc);
ret = 0;
goto sorry_no_fix;
@@ -839,7 +947,7 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
ret = dht_selfheal_dir_getafix (frame, loc, layout);
if (ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"not able to form layout for the directory");
goto sorry_no_fix;
}
@@ -872,3 +980,50 @@ dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
return ret;
}
+
+int
+dht_dir_attr_heal (void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", conf, out);
+
+ call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || (subvol == dht_first_up_subvol (this)))
+ continue;
+ ret = syncop_setattr (subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret)
+ gf_log ("dht", GF_LOG_ERROR, "Failed to set uid/gid on"
+ " %s on %s subvol (%s)", local->loc.path,
+ subvol->name, strerror (errno));
+ }
+out:
+ return 0;
+}
+
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY (sync_frame);
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 000000000..70aac7710
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,758 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "statedump.h"
+#include "dht-common.h"
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+struct volume_options options[];
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix)
+{
+
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+ if (!prefix)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR (layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+
+int32_t
+dht_priv_dump (xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ sprintf (key, "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]){
+ sprintf (key, "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ sprintf (key, "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+
+ sprintf (key, "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d",
+ (int)conf->subvolume_status[i]);
+ }
+
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ if (conf ->du_stats) {
+ gf_proc_dump_write("du_stats.avail_percent", "%lf",
+ conf->du_stats->avail_percent);
+ gf_proc_dump_write("du_stats.avail_space", "%lu",
+ conf->du_stats->avail_space);
+ gf_proc_dump_write("du_stats.avail_inodes", "%lf",
+ conf->du_stats->avail_inodes);
+ gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
+ }
+
+ if (conf->last_stat_fetch.tv_sec)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch.tv_sec));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get (inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+int
+dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup (bricks);
+ node = strtok_r (dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp (conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] =
+ conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_log (this->name, GF_LOG_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r (NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE (dup_brick);
+
+ return ret;
+}
+
+
+int
+dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+ int ret = -1;
+
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+void
+dht_init_regex (xlator_t *this, dict_t *odict, char *name,
+ regex_t *re, gf_boolean_t *re_valid)
+{
+ char *temp_str;
+
+ if (dict_get_str (odict, name, &temp_str) != 0) {
+ if (strcmp(name,"rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str,"none")) {
+ return;
+ }
+
+ if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ }
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "compiling regex %s failed", temp_str);
+ }
+}
+
+int
+dht_reconfigure (xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp (temp_str, "auto")) {
+ if (!gf_string2boolean (temp_str, &search_unhashed)) {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured (%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
+ " lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ //return -1;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
+ percent, out);
+
+ GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
+ bool, out);
+ if (conf->defrag) {
+ GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
+ options, bool, out);
+ }
+
+ if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ ret = dht_decommissioned_remove (this, conf);
+ if (ret == -1)
+ goto out;
+ }
+
+ dht_init_regex (this, options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r (data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup (pattern_str);
+ pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
+ 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize(pattern, &pattern_list->size)
+ == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize (num, &pattern_list->size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+ memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE (pattern_list);
+ GF_FREE (dup_str);
+
+ return ret;
+}
+
+int
+dht_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+
+ ret = dict_get_str (this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
+ "specified");
+ goto err;
+ }
+
+ if (uuid_parse (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
+ "glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+ }
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp (temp_str, "auto"))
+ gf_string2boolean (temp_str, &conf->search_unhashed);
+ else
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+
+ GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
+ err);
+
+ GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
+ err);
+
+ GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
+ err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
+ uint32, err);
+
+ GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
+ bool, err);
+
+ GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str (this->options, "rebalance-filter", &temp_str)
+ == 0) {
+ if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse"
+ " rebalance-filter (%s)", temp_str);
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex (this, this->options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, this->options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+ LOCK_INIT (&conf->layout_lock);
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new (dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf (&conf->link_xattr_name, "%s.linkto", conf->xattr_name);
+ gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf->du_stats);
+
+ GF_FREE (conf->defrag);
+
+ GF_FREE (conf->xattr_name);
+ GF_FREE (conf->link_xattr_name);
+ GF_FREE (conf->wild_xattr_name);
+
+ GF_FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
+ "on", "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes."
+ },
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description = "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ },
+ { .key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ },
+ { .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files."
+ },
+ { .key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."
+ },
+ { .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread."
+ },
+ { .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description = "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick."
+ },
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory."
+ },
+ { .key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries."
+ },
+ { .key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed."
+ },
+ { .key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed."
+ },
+ { .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description = "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it."
+ },
+
+ /* NUFA option */
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+
+ /* switch option */
+ { .key = {"pattern.switch.case"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 8be573f51..fc0ca2f77 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,434 +14,15 @@
#include "config.h"
#endif
-/* TODO: add NS locking */
-
#include "statedump.h"
#include "dht-common.h"
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-struct volume_options options[];
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
-{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", layout, out);
- GF_VALIDATE_OR_GOTO ("dht", prefix, out);
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- gf_proc_dump_build_key(key, prefix, "type");
- gf_proc_dump_write(key, "%d", layout->type);
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
- }
-
-out:
- return;
-}
-
-
-int32_t
-dht_priv_dump (xlator_t *this)
-{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
-
- if (!conf)
- return -1;
-
- ret = TRY_LOCK(&conf->subvolume_lock);
-
- if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to lock dht subvolume %s",
- this->name);
- return ret;
- }
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- sprintf (key, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- sprintf (key, "file_layouts[%d]", i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- sprintf (key, "dir_layouts[%d]", i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
-
- sprintf (key, "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
-
- }
-
- gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
- gf_proc_dump_write("gen", "%d", conf->gen);
- gf_proc_dump_write("min_free_disk", "%lu", conf->min_free_disk);
- gf_proc_dump_write("min_free_inodes", "%lu", conf->min_free_inodes);
- gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
- gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
- gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
- if (conf ->du_stats) {
- gf_proc_dump_write("du_stats.avail_percent", "%lf",
- conf->du_stats->avail_percent);
- gf_proc_dump_write("du_stats.avail_space", "%lu",
- conf->du_stats->avail_space);
- gf_proc_dump_write("du_stats.avail_inodes", "%lf",
- conf->du_stats->avail_inodes);
- gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
- }
- gf_proc_dump_write("last_stat_fetch", "%s", ctime(&conf->last_stat_fetch.tv_sec));
-
- UNLOCK(&conf->subvolume_lock);
-
-out:
- return ret;
-}
-
-int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- dht_layout_t *layout = NULL;
- uint64_t tmp_layout = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", inode, out);
-
- ret = inode_ctx_get (inode, this, &tmp_layout);
-
- if (ret != 0)
- return ret;
-
- layout = (dht_layout_t *)(long)tmp_layout;
-
- if (!layout)
- return -1;
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
- dht_layout_dump(layout, "layout");
-
-out:
- return ret;
-}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = dht_notify (this, event, data);
-
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
- this->private = NULL;
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-out:
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-
-int
-dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
- const char *bricks)
-{
- int i = 0;
- int ret = -1;
- char *tmpstr = NULL;
- char *dup_brick = NULL;
- char *node = NULL;
-
- if (!conf || !bricks)
- goto out;
-
- dup_brick = gf_strdup (bricks);
- node = strtok_r (dup_brick, ",", &tmpstr);
- while (node) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!strcmp (conf->subvolumes[i]->name, node)) {
- conf->decommissioned_bricks[i] =
- conf->subvolumes[i];
- gf_log (this->name, GF_LOG_INFO,
- "decommissioning subvolume %s",
- conf->subvolumes[i]->name);
- break;
- }
- }
- if (i == conf->subvolume_cnt) {
- /* Wrong node given. */
- goto out;
- }
- node = strtok_r (NULL, ",", &tmpstr);
- }
-
- ret = 0;
-out:
- if (dup_brick)
- GF_FREE (dup_brick);
-
- return ret;
-}
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- gf_boolean_t search_unhashed;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", options, out);
-
- conf = this->private;
- if (!conf)
- return 0;
-
- if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean*/
- if (strcasecmp (temp_str, "auto")) {
- if (!gf_string2boolean (temp_str, &search_unhashed)) {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured (%s)",
- temp_str);
- conf->search_unhashed = search_unhashed;
- } else {
- gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
- " lookup-unhashed should be boolean,"
- " not (%s), defaulting to (%d)",
- temp_str, conf->search_unhashed);
- //return -1;
- ret = -1;
- goto out;
- }
- } else {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured auto ");
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
- }
-
- GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
- percent_or_size, out);
- GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
- percent, out);
- GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
- options, uint32, out);
-
- if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, err);
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
- err);
-
- GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
-
- GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
-
- GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
- err);
-
- conf->dir_spread_cnt = conf->subvolume_cnt;
- GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
- uint32, err);
-
- GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
- bool, err);
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
- }
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
-}
-
+class_methods_t class_methods = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
.lookup = dht_lookup,
@@ -478,6 +50,7 @@ struct xlator_fops fops = {
.access = dht_access,
.readlink = dht_readlink,
.getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
.readv = dht_readv,
.flush = dht_flush,
.fsync = dht_fsync,
@@ -486,6 +59,7 @@ struct xlator_fops fops = {
.lk = dht_lk,
/* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
.removexattr = dht_removexattr,
.setxattr = dht_setxattr,
.fsetxattr = dht_fsetxattr,
@@ -496,6 +70,9 @@ struct xlator_fops fops = {
.fxattrop = dht_fxattrop,
.setattr = dht_setattr,
.fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
@@ -509,44 +86,4 @@ struct xlator_cbks cbks = {
// .releasedir = dht_releasedir,
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "on",
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- .default_value = "10%",
- .description = "Percentage/Size of disk space that must be "
- "kept free."
- },
- { .key = {"min-free-inodes"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "5%",
- .description = "Percentage inodes that must be "
- "kept free."
- },
- { .key = {"unhashed-sticky-bit"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"use-readdirp"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"assert-no-child-down"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"directory-layout-spread"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"decommissioned-bricks"},
- .type = GF_OPTION_TYPE_ANY,
- },
- { .key = {NULL} },
-};
+;
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 2f196951a..e934acdf0 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -27,6 +18,8 @@
/* TODO: all 'TODO's in dht.c holds good */
+extern struct volume_options options[];
+
int
nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
@@ -44,7 +37,6 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_cnt = 0;
int ret = 0;
-
conf = this->private;
prev = cookie;
@@ -62,7 +54,8 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
@@ -141,7 +134,7 @@ out:
err:
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, NULL);
+ inode, stbuf, xattr, postparent);
return 0;
}
@@ -211,7 +204,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
* revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -232,7 +225,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
} else {
do_fresh_lookup:
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -241,7 +234,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
}
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ conf->link_xattr_name, 256);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -260,7 +253,8 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -269,7 +263,7 @@ nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -280,21 +274,21 @@ nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd,
- local->params);
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
nufa_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -329,7 +323,8 @@ nufa_create (call_frame_t *frame, xlator_t *this,
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
if (subvol != avail_subvol) {
@@ -337,11 +332,10 @@ nufa_create (call_frame_t *frame, xlator_t *this,
local->params = dict_ref (params);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
@@ -350,14 +344,14 @@ nufa_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -366,34 +360,39 @@ int
nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
local->cached_subvol->fops->mknod,
&local->loc, local->mode, local->rdev,
- local->params);
+ local->umask, local->params);
return 0;
}
-
+err:
WIPE (postparent);
WIPE (preparent);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent, xdata);
return 0;
}
int
nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -429,7 +428,8 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
if (avail_subvol != subvol) {
@@ -437,10 +437,11 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
local->params = dict_ref (params);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this,
avail_subvol, subvol, loc);
return 0;
}
@@ -448,211 +449,185 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
+gf_boolean_t
+same_first_part (char *str1, char term1, char *str2, char term2)
{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
+
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
+ }
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
+ }
+ ++str1;
+ ++str2;
+ }
}
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
- conf = this->private;
+static void
+nufa_find_local_brick (xlator_t *xl, void *data)
+{
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
+
+ if (strcmp (xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using specified subvol %s",
+ local_volname);
+ return;
+ }
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
+ if (!addr_match)
+ return;
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
+ ret = dict_get_str (xl->options, "remote-host", &brick_host);
+ if ((ret == 0) &&
+ (gf_is_same_address (local_volname, brick_host) ||
+ gf_is_local_addr (brick_host))) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using the first local "
+ "subvol %s", xl->name);
+ return;
+ }
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
+}
- GF_FREE (conf);
- }
+static void
+nufa_to_dht (xlator_t *this)
+{
+ GF_ASSERT (this);
+ GF_ASSERT (this->fops);
- return;
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
}
int
-init (xlator_t *this)
+nufa_find_local_subvol (xlator_t *this,
+ void (*fn) (xlator_t *each, void* data), void *data)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- char my_hostname[256];
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first (this, fn, data);
+ if (!conf->private) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't find a local "
+ "brick");
return -1;
}
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf),
- gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
+ parent = trav->xlator;
+ if (strcmp (parent->type, "cluster/nufa") == 0) {
+ gf_log (this->name, GF_LOG_INFO, "Found local subvol, "
+ "%s", candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
+ }
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
+ candidate = parent;
+ trav = parent->parents;
}
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
+ return ret;
+}
- conf->gen = 1;
+int
+nufa_init (xlator_t *this)
+{
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {0, };
- local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
- if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
+ if ((data = dict_get (this->options, "local-volume-name"))) {
local_volname = data->data;
- }
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
+ } else {
+ addr_match = _gf_true;
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret == 0)
+ local_volname = my_hostname;
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->private = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
+ else
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)",
+ strerror (errno));
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
}
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht (this);
}
-
- this->private = conf;
-
return 0;
+}
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
- return -1;
-}
+class_methods_t class_methods = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
@@ -699,19 +674,3 @@ struct xlator_fops fops = {
struct xlator_cbks cbks = {
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index fd3f22ea0..d3ea90ba8 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -31,6 +22,8 @@
#include <fnmatch.h>
#include <string.h>
+extern struct volume_options options[];
+
struct switch_sched_array {
xlator_t *xl;
int32_t eligible;
@@ -76,29 +69,37 @@ get_switch_matching_subvol (const char *path, dht_conf_t *conf,
struct switch_struct *cond = NULL;
struct switch_struct *trav = NULL;
char *pathname = NULL;
- int idx = 0;
+ int idx = 0;
+ xlator_t *subvol = NULL;
cond = conf->private;
+ subvol = hashed_subvol;
if (!cond)
- return hashed_subvol;
+ goto out;
- trav = cond;
pathname = gf_strdup (path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
while (trav) {
if (fnmatch (trav->path_pattern,
pathname, FNM_NOESCAPE) == 0) {
for (idx = 0; idx < trav->num_child; idx++) {
if (trav->array[idx].xl == hashed_subvol)
- return hashed_subvol;
+ goto out;
}
idx = trav->node_index++;
trav->node_index %= trav->num_child;
- return trav->array[idx].xl;
+ subvol = trav->array[idx].xl;
+ goto out;
}
trav = trav->next;
}
+out:
GF_FREE (pathname);
- return hashed_subvol;
+
+ return subvol;
}
@@ -136,7 +137,8 @@ switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
@@ -290,11 +292,11 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
* attribute, revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
+ "failed to set dict value for %s",
+ conf->xattr_name);
for (i = 0; i < layout->cnt; i++) {
subvol = layout->list[i].xlator;
@@ -309,18 +311,18 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
} else {
do_fresh_lookup:
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
+ "failed to set dict value for %s",
+ conf->xattr_name);
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ conf->link_xattr_name, 256);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht.linkto");
+ "failed to set dict value for %s",
+ conf->link_xattr_name);
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -366,7 +368,8 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
return 0;
}
@@ -375,7 +378,7 @@ switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -386,21 +389,21 @@ switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd,
- local->params);
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
switch_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -434,18 +437,18 @@ switch_create (call_frame_t *frame, xlator_t *this,
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- switch_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, switch_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
@@ -454,14 +457,14 @@ switch_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -470,31 +473,36 @@ int
switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
local->cached_subvol->fops->mknod,
&local->loc, local->mode, local->rdev,
- local->params);
+ local->umask, local->params);
return 0;
}
-
+err:
DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent, xdata);
return 0;
}
int
-switch_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -529,7 +537,8 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
if (avail_subvol != subvol) {
@@ -537,46 +546,36 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
local->params = dict_ref (params);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
local->cached_subvol = avail_subvol;
dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
return 0;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
void
-fini (xlator_t *this)
+switch_fini (xlator_t *this)
{
- int i = 0;
dht_conf_t *conf = NULL;
struct switch_struct *trav = NULL;
struct switch_struct *prev = NULL;
@@ -587,30 +586,14 @@ fini (xlator_t *this)
trav = (struct switch_struct *)conf->private;
conf->private = NULL;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
prev = trav;
trav = trav->next;
GF_FREE (prev);
}
-
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
}
- return;
+ dht_fini(this);
}
int
@@ -670,8 +653,10 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
dup_str = gf_strdup (switch_str);
switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
gf_switch_mt_switch_struct);
- if (!switch_opt)
+ if (!switch_opt) {
+ GF_FREE (dup_str);
goto err;
+ }
pattern = strtok_r (dup_str, ":", &tmp_str1);
childs = strtok_r (NULL, ":", &tmp_str1);
@@ -681,6 +666,7 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
"for all the unconfigured child nodes,"
" hence neglecting current option");
switch_str = strtok_r (NULL, ";", &tmp_str);
+ GF_FREE (switch_opt);
GF_FREE (dup_str);
continue;
}
@@ -753,6 +739,7 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
/* First entry */
switch_buf = switch_opt;
}
+ switch_opt = NULL;
switch_str = strtok_r (NULL, ";", &tmp_str);
}
@@ -809,19 +796,20 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
/* First entry */
switch_buf = switch_opt;
}
+ switch_opt = NULL;
}
/* */
conf->private = switch_buf;
return 0;
err:
+ GF_FREE (switch_buf_array);
+ GF_FREE (switch_opt);
+
if (switch_buf) {
- if (switch_buf_array)
- GF_FREE (switch_buf_array);
trav = switch_buf;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
switch_opt = trav;
trav = trav->next;
GF_FREE (switch_opt);
@@ -831,68 +819,18 @@ err:
}
-int
-init (xlator_t *this)
+int32_t
+switch_init (xlator_t *this)
{
dht_conf_t *conf = NULL;
data_t *data = NULL;
- char *temp_str = NULL;
int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "SWITCH needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_switch_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
- conf->unhashed_sticky_bit = 0;
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
+ conf = this->private;
data = dict_get (this->options, "pattern.switch.case");
if (data) {
@@ -903,65 +841,23 @@ init (xlator_t *this)
}
}
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_switch_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
- }
-
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
- }
-
this->private = conf;
-
return 0;
err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
+ dht_fini(this);
return -1;
}
+class_methods_t class_methods = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
+
+
struct xlator_fops fops = {
.lookup = switch_lookup,
.create = switch_create,
@@ -1006,19 +902,3 @@ struct xlator_fops fops = {
struct xlator_cbks cbks = {
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"pattern.switch.case"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
index 5f78a2965..5c1364b7f 100644
--- a/xlators/cluster/ha/src/Makefile.am
+++ b/xlators/cluster/ha/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = ha.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-ha_la_LDFLAGS = -module -avoidversion
+ha_la_LDFLAGS = -module -avoid-version
ha_la_SOURCES = ha-helpers.c ha.c
ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = ha.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
index 1e4af1b62..19be1ed27 100644
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ b/xlators/cluster/ha/src/ha-helpers.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
index 9bfb3972b..e5e97d237 100644
--- a/xlators/cluster/ha/src/ha-mem-types.h
+++ b/xlators/cluster/ha/src/ha-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __HA_MEM_TYPES_H__
#define __HA_MEM_TYPES_H__
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
index 38d4229d3..3eccb516b 100644
--- a/xlators/cluster/ha/src/ha.c
+++ b/xlators/cluster/ha/src/ha.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* generate errors randomly, code is simple now, better alogorithm
* can be written to decide what error to be returned and when
*/
@@ -1876,13 +1866,9 @@ err:
}
if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- }
+ GF_FREE (hafdp->fdstate);
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- }
+ GF_FREE (hafdp->path);
GF_FREE (hafdp);
}
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
index 39b6851e7..e2ed7eaa6 100644
--- a/xlators/cluster/ha/src/ha.h
+++ b/xlators/cluster/ha/src/ha.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __HA_H_
#define __HA_H_
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
index 26e19137a..a278b05e2 100644
--- a/xlators/cluster/map/src/Makefile.am
+++ b/xlators/cluster/map/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = map.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-map_la_LDFLAGS = -module -avoidversion
+map_la_LDFLAGS = -module -avoid-version
map_la_SOURCES = map.c map-helper.c
map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = map.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
index 81212fcfd..851397b68 100644
--- a/xlators/cluster/map/src/map-helper.c
+++ b/xlators/cluster/map/src/map-helper.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2009-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
index 669b93dc2..3e89f4736 100644
--- a/xlators/cluster/map/src/map-mem-types.h
+++ b/xlators/cluster/map/src/map-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_MEM_TYPES_H__
#define __MAP_MEM_TYPES_H__
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
index ead9da0b9..6150a33ce 100644
--- a/xlators/cluster/map/src/map.c
+++ b/xlators/cluster/map/src/map.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -2375,8 +2365,7 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
- if (priv->xlarray)
- GF_FREE (priv->xlarray);
+ GF_FREE (priv->xlarray);
trav_map = priv->map;
while (trav_map) {
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
index bccac437c..7703a543e 100644
--- a/xlators/cluster/map/src/map.h
+++ b/xlators/cluster/map/src/map.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_H__
#define __MAP_H__
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
index 0db3c9eeb..2d151422a 100644
--- a/xlators/cluster/stripe/src/Makefile.am
+++ b/xlators/cluster/stripe/src/Makefile.am
@@ -2,16 +2,19 @@
xlator_LTLIBRARIES = stripe.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-stripe_la_LDFLAGS = -module -avoidversion
+stripe_la_LDFLAGS = -module -avoid-version
+
+stripe_la_SOURCES = stripe.c stripe-helpers.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-stripe_la_SOURCES = stripe.c $(top_builddir)/xlators/lib/src/libxlator.c
stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = stripe.h stripe-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
CLEANFILES =
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
new file mode 100644
index 000000000..a83abdc72
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-helpers.c
@@ -0,0 +1,675 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "stripe.h"
+#include "byte-order.h"
+#include "mem-types.h"
+
+void
+stripe_local_wipe (stripe_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+out:
+ return;
+}
+
+
+
+int
+stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+
+ dst = data;
+
+ if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "memory allocation failed");
+ goto out;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "stripe aggregate dict set failed");
+ GF_FREE (size);
+ goto out;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
+ goto out;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+ } else if (strcmp (key, GF_CONTENT_KEY)) {
+ /* No need to aggregate 'CONTENT' data */
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+out:
+ return 0;
+}
+
+
+void
+stripe_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, stripe_aggregate, dst);
+out:
+ return;
+}
+
+
+int32_t
+stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *sbuf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!buffer || !local || !local->xattr_list)
+ goto out;
+
+ sbuf = buffer;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ memcpy (buffer, xattr->xattr_value, len);
+ buffer += len;
+ *buffer++ = ' ';
+ }
+ }
+
+ *--buffer = '\0';
+ if (total)
+ *total = buffer - sbuf;
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int32_t
+stripe_free_xattr_str (stripe_local_t *local)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!local || !local->xattr_list)
+ goto out;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+
+ if (xattr && xattr->xattr_value)
+ GF_FREE (xattr->xattr_value);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz)
+{
+ int32_t ret = -1, i = 0, len = 0;
+ dict_t *tmp1 = NULL, *tmp2 = NULL;
+ char *buf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (xattr_serz == NULL) {
+ goto out;
+ }
+
+ tmp2 = dict_new ();
+
+ if (tmp2 == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ ret = dict_reset (tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_reset failed (%s)",
+ strerror (-ret));
+ }
+
+ ret = dict_unserialize (xattr->xattr_value,
+ xattr->xattr_len,
+ &tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_unserialize failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ tmp1 = dict_copy (tmp2, tmp1);
+ if (tmp1 == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_copy failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ len = dict_serialized_length (tmp1);
+ if (len > 0) {
+ buf = GF_CALLOC (1, len, gf_common_mt_dict_t);
+ if (buf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_serialize (tmp1, buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s)", strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ *xattr_serz = buf;
+ }
+
+ ret = 0;
+out:
+ if (tmp1 != NULL) {
+ dict_unref (tmp1);
+ }
+
+ if (tmp2 != NULL) {
+ dict_unref (tmp2);
+ }
+
+ return ret;
+}
+
+
+int32_t
+stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz)
+{
+ int ret = -1;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+ char stripe_size_str[20] = {0,};
+ char *pathinfo_serz = NULL;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ goto out;
+ }
+
+ (void) snprintf (stripe_size_str, 20, "%ld",
+ (local->fctx) ? local->fctx->stripe_size : 0);
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
+ + strlen (stripe_size_str) + 7;
+ local->xattr_total_len += (padding + 2);
+
+ pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
+ gf_common_mt_char);
+ if (!pathinfo_serz)
+ goto out;
+
+ /* xlator info */
+ (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ",
+ this->name, stripe_size_str);
+
+ ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot aggregate pathinfo list");
+ goto out;
+ }
+
+ *(pathinfo_serz + padding + tlen) = ')';
+ *(pathinfo_serz + padding + tlen + 1) = '\0';
+
+ *xattr_serz = pathinfo_serz;
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+/**
+ * stripe_get_matching_bs - Get the matching block size for the given path.
+ */
+int32_t
+stripe_get_matching_bs (const char *path, stripe_private_t *priv)
+{
+ struct stripe_options *trav = NULL;
+ uint64_t block_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+ GF_VALIDATE_OR_GOTO ("stripe", path, out);
+
+ LOCK (&priv->lock);
+ {
+ block_size = priv->block_size;
+ trav = priv->pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) {
+ block_size = trav->block_size;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return block_size;
+}
+
+int32_t
+stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
+ dict_t *dict)
+{
+ char key[256] = {0,};
+ data_t *data = NULL;
+ int32_t index = 0;
+ stripe_private_t *priv = NULL;
+
+ priv = this->private;
+
+
+ if (!local->fctx) {
+ local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
+ gf_stripe_mt_stripe_fd_ctx_t);
+ if (!local->fctx) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
+ local->fctx->static_array = 0;
+ }
+ /* Stripe block size */
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-size");
+ goto out;
+ } else {
+ if (!local->fctx->stripe_size) {
+ local->fctx->stripe_size =
+ data_to_int64 (data);
+ }
+
+ if (local->fctx->stripe_size != data_to_int64 (data)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stripe-size mismatch in blocks");
+ local->xattr_self_heal_needed = 1;
+ }
+ }
+
+ /* Stripe count */
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ data = dict_get (dict, key);
+
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-count");
+ goto out;
+ }
+ if (!local->fctx->xl_array) {
+ local->fctx->stripe_count = data_to_int32 (data);
+ if (!local->fctx->stripe_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count,
+ sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
+
+ if (!local->fctx->xl_array) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+ if (local->fctx->stripe_count != data_to_int32 (data)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr (%d != %d)",
+ local->fctx->stripe_count, data_to_int32 (data));
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* index */
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-index");
+ goto out;
+ }
+ index = data_to_int32 (data);
+ if (index > priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-index xattr (%d)", index);
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+ if (local->fctx->xl_array) {
+ if (!local->fctx->xl_array[index])
+ local->fctx->xl_array[index] = prev->this;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ data = dict_get(dict, key);
+ if (!data) {
+ /*
+ * The file was probably created prior to coalesce support.
+ * Assume non-coalesce mode for this file to maintain backwards
+ * compatibility.
+ */
+ gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
+ "attr, assume non-coalesce mode");
+ local->fctx->stripe_coalesce = 0;
+ } else {
+ local->fctx->stripe_coalesce = data_to_int32(data);
+ }
+
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
+ uint32_t stripe_count, uint32_t stripe_index,
+ uint32_t stripe_coalesce)
+{
+ char key[256] = {0,};
+ int32_t ret = -1;
+
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ ret = dict_set_int64 (dict, key, stripe_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ ret = dict_set_int32 (dict, key, stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ ret = dict_set_int32 (dict, key, stripe_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ ret = dict_set_int32(dict, key, stripe_coalesce);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req_dict", key);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+
+static int
+set_default_block_size (stripe_private_t *priv, char *num)
+{
+
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO ("stripe", THIS, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, num, out);
+
+
+ if (gf_string2bytesize (num, &priv->block_size) != 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+
+}
+
+
+int
+set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *num = NULL;
+ struct stripe_options *temp_stripeopt = NULL;
+ struct stripe_options *stripe_opt = NULL;
+
+ if (!this || !priv || !data)
+ goto out;
+
+ /* Get the pattern for striping.
+ "option block-size *avi:10MB" etc */
+ stripe_str = strtok_r (data, ",", &tmp_str);
+ while (stripe_str) {
+ dup_str = gf_strdup (stripe_str);
+ stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
+ gf_stripe_mt_stripe_options);
+ if (!stripe_opt) {
+ goto out;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!num) {
+ num = pattern;
+ pattern = "*";
+ ret = set_default_block_size (priv, num);
+ if (ret)
+ goto out;
+ }
+ if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
+ "%s. Should be atleast %llu bytes", num,
+ STRIPE_MIN_BLOCK_SIZE);
+ goto out;
+ }
+ if (stripe_opt->block_size % 512) {
+ gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
+ " be a multiple of 512 bytes", num);
+ goto out;
+ }
+
+ memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "block-size : pattern %s : size %"PRId64,
+ stripe_opt->path_pattern, stripe_opt->block_size);
+
+ if (priv->pattern)
+ temp_stripeopt = NULL;
+ else
+ temp_stripeopt = priv->pattern;
+
+ stripe_opt->next = temp_stripeopt;
+
+ priv->pattern = stripe_opt;
+ stripe_opt = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+
+ GF_FREE (dup_str);
+
+ GF_FREE (stripe_opt);
+
+ return ret;
+}
+
+int32_t
+stripe_iatt_merge (struct iatt *from, struct iatt *to)
+{
+ if (to->ia_size < from->ia_size)
+ to->ia_size = from->ia_size;
+ if (to->ia_mtime < from->ia_mtime)
+ to->ia_mtime = from->ia_mtime;
+ if (to->ia_ctime < from->ia_ctime)
+ to->ia_ctime = from->ia_ctime;
+ if (to->ia_atime < from->ia_atime)
+ to->ia_atime = from->ia_atime;
+ return 0;
+}
+
+off_t
+coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
+{
+ size_t line_size = 0;
+ uint64_t stripe_num = 0;
+ off_t coalesced_offset = 0;
+
+ line_size = stripe_size * stripe_count;
+ stripe_num = offset / line_size;
+
+ coalesced_offset = (stripe_num * stripe_size) +
+ (offset % stripe_size);
+
+ return coalesced_offset;
+}
+
+off_t
+uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index)
+{
+ uint64_t nr_full_stripe_chunks = 0, mod = 0;
+
+ if (!size)
+ return size;
+
+ /*
+ * Estimate the number of fully written stripes from the
+ * local file size. Each stripe_size chunk corresponds to
+ * a stripe.
+ */
+ nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
+ mod = size % stripe_size;
+
+ if (!mod) {
+ /*
+ * There is no remainder, thus we could have overestimated
+ * the size of the file in terms of chunks. Trim the number
+ * of chunks by the following stripe members and leave it
+ * up to those nodes to respond with a larger size (if
+ * necessary).
+ */
+ nr_full_stripe_chunks -= stripe_count -
+ (stripe_index + 1);
+ size = nr_full_stripe_chunks * stripe_size;
+ } else {
+ /*
+ * There is a remainder and thus we own the last chunk of the
+ * file. Add the preceding stripe members of the final stripe
+ * along with the remainder to calculate the exact size.
+ */
+ nr_full_stripe_chunks += stripe_index;
+ size = nr_full_stripe_chunks * stripe_size + mod;
+ }
+
+ return size;
+}
+
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
index 29c95c257..e9ac9cf46 100644
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ b/xlators/cluster/stripe/src/stripe-mem-types.h
@@ -1,21 +1,11 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,12 +15,12 @@
#include "mem-types.h"
enum gf_stripe_mem_types_ {
- gf_stripe_mt_stripe_local_t = gf_common_mt_end + 1,
- gf_stripe_mt_iovec,
- gf_stripe_mt_readv_replies,
+ gf_stripe_mt_iovec = gf_common_mt_end + 1,
+ gf_stripe_mt_stripe_replies,
gf_stripe_mt_stripe_fd_ctx_t,
gf_stripe_mt_char,
gf_stripe_mt_int8_t,
+ gf_stripe_mt_int32_t,
gf_stripe_mt_xlator_t,
gf_stripe_mt_stripe_private_t,
gf_stripe_mt_stripe_options,
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index 01064cd51..69b510e23 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/**
@@ -32,6 +23,7 @@
* very much necessary, or else, use it in combination with AFR, to have a
* backup copy.
*/
+#include <fnmatch.h>
#include "stripe.h"
#include "libxlator.h"
@@ -40,73 +32,10 @@
struct volume_options options[];
-void
-stripe_local_wipe (stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->inode)
- inode_unref (local->inode);
-
- if (local->xattr)
- dict_unref (local->xattr);
-
- if (local->dict)
- dict_unref (local->dict);
-
-out:
- return;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs (const char *path, struct stripe_options *opts,
- uint64_t default_bs)
-{
- struct stripe_options *trav = NULL;
- char *pathname = NULL;
- uint64_t block_size = 0;
-
- block_size = default_bs;
-
- if (!path || !opts)
- goto out;
-
- /* FIXME: is a strdup really necessary? */
- pathname = gf_strdup (path);
- if (!pathname)
- goto out;
-
- trav = opts;
- while (trav) {
- if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
-
- GF_FREE (pathname);
-
-out:
- return block_size;
-}
-
-
-
int32_t
stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int callcnt = -1;
stripe_local_t *local = NULL;
@@ -135,7 +64,7 @@ int32_t
stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -150,7 +79,7 @@ stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, stripe_sh_chown_cbk, prev->this,
prev->this->fops->setattr, &local->loc,
- &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID));
+ &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
out:
return 0;
@@ -164,7 +93,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
stripe_private_t *priv = NULL;
- dict_t *dict = NULL;
+ dict_t *xdata = NULL;
int ret = 0;
if (!local || !this || !frame) {
@@ -182,8 +111,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
if (!rframe) {
goto out;
}
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
goto out;
}
@@ -192,11 +120,11 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
loc_copy (&rlocal->loc, &local->loc);
memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt));
- dict = dict_new ();
- if (!dict)
+ xdata = dict_new ();
+ if (!xdata)
goto out;
- ret = dict_set_static_bin (dict, "gfid-req", local->stbuf.ia_gfid, 16);
+ ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16);
if (ret)
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to set gfid-req", local->loc.path);
@@ -207,103 +135,42 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
trav->xlator, trav->xlator->fops->mknod,
&local->loc,
st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type), 0,
- dict);
+ local->stbuf.ia_type),
+ 0, 0, xdata);
}
if (IA_ISDIR (local->stbuf.ia_type)) {
STACK_WIND (rframe, stripe_sh_make_entry_cbk,
trav->xlator, trav->xlator->fops->mkdir,
- &local->loc, st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- dict);
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, xdata);
}
trav = trav->next;
}
- if (dict)
- dict_unref (dict);
+ if (xdata)
+ dict_unref (xdata);
return 0;
out:
if (rframe)
STRIPE_STACK_DESTROY (rframe);
- if (dict)
- dict_unref (dict);
+ if (xdata)
+ dict_unref (xdata);
return 0;
}
-void
-stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
-
- dst = data;
-
- if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin (dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC (1, sizeof (int64_t),
- gf_common_mt_char);
- if (size == NULL) {
- gf_log ("stripe", GF_LOG_WARNING,
- "memory allocation failed");
- goto out;
- }
- ret = dict_set_bin (dst, key, size, sizeof (int64_t));
- if (ret < 0) {
- gf_log ("stripe", GF_LOG_WARNING,
- "stripe aggregate dict set failed");
- GF_FREE (size);
- goto out;
- }
- }
-
- ptr = data_to_bin (value);
- if (ptr == NULL) {
- gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
- goto out;
- }
-
- *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
- } else if (strcmp (key, GF_CONTENT_KEY)) {
- /* No need to aggregate 'CONTENT' data */
- ret = dict_set (dst, key, value);
- if (ret)
- gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
- }
-
-out:
- return;
-}
-
-
-void
-stripe_aggregate_xattr (dict_t *dst, dict_t *src)
-{
- if ((dst == NULL) || (src == NULL)) {
- goto out;
- }
-
- dict_foreach (src, stripe_aggregate, dst);
-out:
- return;
-}
-
-
int32_t
stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
- uint64_t stripe_size = 0;
- char size_xattr[256] = {0,};
int ret = 0;
if (!this || !frame || !frame->local || !cookie) {
@@ -335,43 +202,42 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG (buf->ia_type)) {
+ ret = stripe_ctx_handle (this, prev, local,
+ xdata);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error getting fctx info from"
+ " dict");
+ }
if (FIRST_CHILD(this) == prev->this) {
local->stbuf = *buf;
local->postparent = *postparent;
local->inode = inode_ref (inode);
- local->dict = dict_ref (dict);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
if (local->xattr) {
- stripe_aggregate_xattr (local->dict,
+ stripe_aggregate_xattr (local->xdata,
local->xattr);
dict_unref (local->xattr);
local->xattr = NULL;
}
-
- (void) snprintf (size_xattr, 256,
- "trusted.%s.stripe-size",
- this->name);
- ret = dict_get_uint64 (dict, size_xattr,
- &stripe_size);
- if (!ret) {
- ret = inode_ctx_put (inode, this,
- stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "Error setting ctx");
- }
}
- if (!local->dict && !local->xattr) {
- local->xattr = dict_ref (dict);
- } else if (local->dict) {
- stripe_aggregate_xattr (local->dict, dict);
+
+ if (!local->xdata && !local->xattr) {
+ local->xattr = dict_ref (xdata);
+ } else if (local->xdata) {
+ stripe_aggregate_xattr (local->xdata, xdata);
} else if (local->xattr) {
- stripe_aggregate_xattr (local->xattr, dict);
+ stripe_aggregate_xattr (local->xattr, xdata);
}
local->stbuf_blocks += buf->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->postparent_size < postparent->ia_size)
@@ -403,11 +269,13 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->postparent.ia_blocks = local->postparent_blocks;
local->postparent.ia_size = local->postparent_size;
+ inode_ctx_put (local->inode, this,
+ (uint64_t) (long)local->fctx);
}
STRIPE_STACK_UNWIND (lookup, frame, local->op_ret,
local->op_errno, local->inode,
- &local->stbuf, local->dict,
+ &local->stbuf, local->xdata,
&local->postparent);
}
out:
@@ -416,7 +284,7 @@ out:
int32_t
stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -424,7 +292,7 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
int32_t op_errno = EINVAL;
int64_t filesize = 0;
int ret = 0;
- char xtra_xattr[256] = {0,};
+ uint64_t tmpctx = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -436,8 +304,7 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -446,20 +313,37 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
frame->local = local;
loc_copy (&local->loc, loc);
- if (xattr_req && dict_get (xattr_req, GF_CONTENT_KEY)) {
- ret = dict_get_int64 (xattr_req, GF_CONTENT_KEY, &filesize);
+ inode_ctx_get (local->inode, this, &tmpctx);
+ if (tmpctx)
+ local->fctx = (stripe_fd_ctx_t*) (long)tmpctx;
+
+ /* quick-read friendly changes */
+ if (xdata && dict_get (xdata, GF_CONTENT_KEY)) {
+ ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize);
if (!ret && (filesize > priv->block_size))
- dict_del (xattr_req, GF_CONTENT_KEY);
+ dict_del (xdata, GF_CONTENT_KEY);
}
- /* get stripe-size xattr on lookup for pathinfo string */
- if (xattr_req) {
- (void) snprintf (xtra_xattr, 256, "trusted.%s.stripe-size",
- this->name);
- ret = dict_set_uint64 (xattr_req, xtra_xattr, (uint64_t) 0);
+ /* get stripe-size xattr on lookup. This would be required for
+ * open/read/write/pathinfo calls. Hence we send down the request
+ * even when type == IA_INVAL */
+
+ /*
+ * We aren't guaranteed to have xdata here. We need the format info for
+ * the file, so allocate xdata if necessary.
+ */
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
+ if (xdata && (IA_ISREG (loc->inode->ia_type) ||
+ (loc->inode->ia_type == IA_INVAL))) {
+ ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set stripe-"
- "size key in xattr request dict");
+ gf_log (this->name , GF_LOG_ERROR, "Failed to build"
+ " xattr request for %s", loc->path);
+
}
/* Everytime in stripe lookup, all child nodes
@@ -467,11 +351,12 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup,
- loc, xattr_req);
+ trav->xlator->fops->lookup, loc, xdata);
trav = trav->next;
}
+ dict_unref(xdata);
+
return 0;
err:
STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
@@ -481,7 +366,7 @@ err:
int32_t
stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -516,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -532,18 +420,19 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STRIPE_STACK_UNWIND (stat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ local->op_errno, &local->stbuf, NULL);
}
out:
return 0;
}
int32_t
-stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -561,8 +450,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -571,23 +459,30 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc);
+ trav->xlator->fops->stat, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata)
{
stripe_local_t *local = NULL;
int32_t callcnt = 0;
@@ -625,14 +520,14 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
STRIPE_STACK_UNWIND (statfs, frame, local->op_ret,
- local->op_errno, &local->statvfs_buf);
+ local->op_errno, &local->statvfs_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -647,8 +542,7 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -660,13 +554,13 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc);
+ trav->xlator->fops->statfs, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -675,7 +569,7 @@ err:
int32_t
stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -713,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -735,19 +632,21 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (truncate, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -756,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
@@ -764,8 +662,7 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -774,15 +671,55 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->truncate, loc, offset);
- trav = trav->next;
- }
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no xlator at index %d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ /*
+ * The node that owns EOF is truncated to the exact
+ * coalesced offset. Nodes prior to this index should
+ * be rounded up to the size of the complete stripe,
+ * while nodes after this index should be rounded down
+ * to the size of the previous stripe.
+ */
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->truncate, loc, dest_offset,
+ NULL);
+ }
return 0;
err:
- STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -790,7 +727,7 @@ err:
int32_t
stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -829,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += preop->ia_blocks;
local->postbuf_blocks += postop->ia_blocks;
+ correct_file_size(preop, local->fctx, prev);
+ correct_file_size(postop, local->fctx, prev);
+
if (local->prebuf_size < preop->ia_size)
local->prebuf_size = preop->ia_size;
if (local->postbuf_size < postop->ia_size)
@@ -850,7 +790,7 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (setattr, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -859,11 +799,12 @@ out:
int32_t
stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -881,8 +822,7 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -894,28 +834,35 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->call_count = 1;
STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
return 0;
}
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk,
trav->xlator, trav->xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -931,8 +878,7 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -943,13 +889,13 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid);
+ trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -957,7 +903,8 @@ int32_t
stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -994,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->pre_buf.ia_blocks += prenewparent->ia_blocks;
local->post_buf.ia_blocks += postnewparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf.ia_size < buf->ia_size)
local->stbuf.ia_size = buf->ia_size;
@@ -1019,7 +968,7 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preparent,
&local->postparent, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -1029,7 +978,8 @@ int32_t
stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -1060,24 +1010,25 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
while (trav) {
STACK_WIND (frame, stripe_stack_rename_cbk,
trav->xlator, trav->xlator->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
trav = trav->next;
}
return 0;
unwind:
STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
+ postoldparent, prenewparent, postnewparent, NULL);
return 0;
}
int32_t
stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -1097,8 +1048,7 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1109,21 +1059,28 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
local->call_count = priv->child_count;
+ if (IA_ISREG(oldloc->inode->ia_type)) {
+ inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
frame->local = local;
STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc);
+ trav->xlator->fops->rename, oldloc, newloc, NULL);
return 0;
err:
STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int32_t
stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1148,10 +1105,10 @@ stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent_blocks += postparent->ia_blocks;
STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, xdata);
return 0;
out:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1162,7 +1119,7 @@ out:
int32_t
stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1198,17 +1155,19 @@ stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->unlink, &local->loc);
+ FIRST_CHILD (this)->fops->unlink, &local->loc,
+ local->xflag, local->xdata);
}
return 0;
out:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1236,14 +1195,18 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
frame->local = local;
local->call_count = priv->child_count;
trav = trav->next; /* Skip the first child */
@@ -1251,13 +1214,13 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
while (trav) {
STACK_WIND (frame, stripe_unlink_cbk,
trav->xlator, trav->xlator->fops->unlink,
- loc);
+ loc, xflag, xdata);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1265,8 +1228,7 @@ err:
int32_t
stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
@@ -1293,10 +1255,10 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent_blocks += postparent->ia_blocks;
STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1304,7 +1266,7 @@ err:
int32_t
stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1336,16 +1298,16 @@ stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->rmdir, &local->loc,
- local->flags);
+ local->flags, NULL);
}
return 0;
out:
- STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1368,8 +1330,7 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1383,13 +1344,13 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
while (trav) {
STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc, flags);
+ trav->xlator->fops->rmdir, loc, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1398,7 +1359,7 @@ int32_t
stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1419,7 +1380,7 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
if (!callcnt) {
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1431,7 +1392,7 @@ out:
int32_t
stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1470,7 +1431,7 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
@@ -1478,7 +1439,7 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1488,7 +1449,7 @@ int32_t
stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1526,10 +1487,16 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (uuid_is_null (local->ia_gfid))
uuid_copy (local->ia_gfid, buf->ia_gfid);
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict");
+
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1554,7 +1521,7 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
@@ -1568,13 +1535,13 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent.ia_size = local->postparent_size;
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- }
+ inode_ctx_put (local->inode, this,
+ (uint64_t)(long) local->fctx);
- /* Create itself has failed.. so return
- without setxattring */
+ }
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1585,16 +1552,13 @@ int32_t
stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
int i = 1;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
int ret = 0;
int need_unref = 0;
@@ -1634,10 +1598,6 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
-
trav = trav->next;
while (trav) {
if (priv->xattr_supported) {
@@ -1650,26 +1610,21 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dict_copy (local->xattr, dict);
- ret = dict_set_int64 (dict, size_key, local->stripe_size);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count, i,
+ priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", local->loc.path);
- ret = dict_set_int32 (dict, count_key, priv->child_count);
+ "Failed to build xattr request");
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", local->loc.path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", local->loc.path);
} else {
dict = local->xattr;
}
STACK_WIND (frame, stripe_mknod_ifreg_cbk,
trav->xlator, trav->xlator->fops->mknod,
- &local->loc, local->mode, local->rdev, dict);
+ &local->loc, local->mode, local->rdev, 0, dict);
trav = trav->next;
i++;
@@ -1681,7 +1636,7 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
- STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1690,25 +1645,22 @@ int32_t
stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *params)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
int32_t op_errno = EINVAL;
int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
int ret = 0;
int need_unref = 0;
@@ -1738,37 +1690,26 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
- local->xattr = dict_copy_with_ref (params, NULL);
+ local->xattr = dict_copy_with_ref (xdata, NULL);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
/* Everytime in stripe lookup, all child nodes should
be looked up */
local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
-
if (priv->xattr_supported) {
dict = dict_new ();
if (!dict) {
@@ -1777,29 +1718,22 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
need_unref = 1;
- dict_copy (params, dict);
+ dict_copy (xdata, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", loc->path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", loc->path);
+ "failed to build xattr request");
} else {
- dict = params;
+ dict = xdata;
}
STACK_WIND (frame, stripe_mknod_first_ifreg_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
- loc, mode, rdev, dict);
+ loc, mode, rdev, umask, dict);
if (dict && need_unref)
dict_unref (dict);
@@ -1808,11 +1742,11 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
STACK_WIND (frame, stripe_single_mknod_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, params);
+ loc, mode, rdev, umask, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1821,7 +1755,7 @@ int32_t
stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1878,7 +1812,7 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
out:
return 0;
@@ -1887,9 +1821,9 @@ out:
int32_t
stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1920,7 +1854,7 @@ stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf = *buf;
local->postparent = *postparent;
local->preparent = *preparent;
-
+
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
@@ -1932,13 +1866,13 @@ stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
while (trav) {
STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator,
trav->xlator->fops->mkdir, &local->loc, local->mode,
- local->dict);
+ local->umask, local->xdata);
trav = trav->next;
}
return 0;
out:
STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
@@ -1947,7 +1881,7 @@ out:
int
stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -1969,26 +1903,27 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->call_count = priv->child_count;
- local->dict = dict_ref (params);
- local->mode = mode;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ local->mode = mode;
+ local->umask = umask;
loc_copy (&local->loc, loc);
frame->local = local;
/* Everytime in stripe lookup, all child nodes should be looked up */
STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, loc, mode, params);
+ trav->xlator->fops->mkdir, loc, mode, umask, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1997,11 +1932,12 @@ int32_t
stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -2028,6 +1964,16 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG(inode->ia_type)) {
+ inode_ctx_get(inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get stripe context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ }
+
if (FIRST_CHILD(this) == prev->this) {
local->inode = inode_ref (inode);
local->stbuf = *buf;
@@ -2038,6 +1984,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -2063,14 +2011,14 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (link, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
out:
return 0;
}
int32_t
-stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -2093,8 +2041,7 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2108,13 +2055,13 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
while (trav) {
STACK_WIND (frame, stripe_link_cbk,
trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
+ oldloc, newloc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -2122,7 +2069,7 @@ int32_t
stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2143,7 +2090,7 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
if (!callcnt) {
STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno,
local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -2154,12 +2101,11 @@ int32_t
stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
@@ -2185,12 +2131,21 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0) {
+ if (IA_ISREG(buf->ia_type)) {
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from "
+ "dict");
+ }
+
local->op_ret = op_ret;
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -2213,7 +2168,7 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_create_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
@@ -2228,29 +2183,19 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!fctx) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
-
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)fctx);
+ stripe_copy_xl_array(local->fctx->xl_array,
+ priv->xl_array,
+ local->fctx->stripe_count);
+ inode_ctx_put(local->inode, this,
+ (uint64_t) local->fctx);
}
- unwind:
/* Create itself has failed.. so return
without setxattring */
STRIPE_STACK_UNWIND (create, frame, local->op_ret,
local->op_errno, local->fd,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
@@ -2263,7 +2208,7 @@ int32_t
stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2274,9 +2219,6 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
int32_t need_unref = 0;
int32_t ret = -1;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -2323,7 +2265,7 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->call_count = 1;
STACK_WIND (frame, stripe_create_fail_unlink_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
return 0;
}
@@ -2338,9 +2280,6 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Send a setxattr request to nodes where the
files are created */
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
trav = trav->next;
while (trav) {
if (priv->xattr_supported) {
@@ -2353,27 +2292,20 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dict_copy (local->xattr, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", loc->path);
- ret = dict_set_int32 (dict, index_key, i);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", loc->path);
+ "failed to build xattr request");
} else {
dict = local->xattr;
}
-
+
STACK_WIND (frame, stripe_create_cbk, trav->xlator,
trav->xlator->fops->create, &local->loc,
- local->flags, local->mode, local->fd,
+ local->flags, local->mode, local->umask, local->fd,
dict);
trav = trav->next;
if (need_unref && dict)
@@ -2395,7 +2327,7 @@ out:
*/
int32_t
stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -2403,9 +2335,6 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
int ret = 0;
int need_unref = 0;
int i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
VALIDATE_OR_GOTO (frame, err);
@@ -2427,31 +2356,27 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
local->fd = fd_ref (fd);
local->flags = flags;
local->mode = mode;
- local->xattr = dict_copy_with_ref (params, NULL);
+ local->umask = umask;
+ if (xdata)
+ local->xattr = dict_ref (xdata);
local->call_count = priv->child_count;
/* Send a setxattr request to nodes where the
files are created */
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
if (priv->xattr_supported) {
dict = dict_new ();
@@ -2461,30 +2386,23 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
need_unref = 1;
- dict_copy (params, dict);
+ dict_copy (xdata, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", loc->path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", loc->path);
+ "failed to build xattr request");
} else {
- dict = params;
+ dict = xdata;
}
STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->create, loc, flags, mode,
- fd, dict);
+ umask, fd, dict);
if (need_unref && dict)
dict_unref (dict);
@@ -2493,13 +2411,13 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
return 0;
err:
STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, xdata);
return 0;
}
int32_t
stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2537,224 +2455,25 @@ stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- if (local->op_ret == -1) {
- if (local->fctx) {
- if (!local->fctx->static_array)
- GF_FREE (local->fctx->xl_array);
- GF_FREE (local->fctx);
- }
- } else {
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)local->fctx);
- }
-
STRIPE_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, xdata);
}
out:
return 0;
}
-int32_t
-stripe_open_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- char key[256] = {0,};
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = (call_frame_t *)cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- if (local->op_errno != EIO)
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- goto unlock;
- }
-
- if (!dict)
- goto unlock;
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf (key, "trusted.%s.stripe-size", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size =
- data_to_int64 (data);
- }
-
- if (local->fctx->stripe_size != data_to_int64 (data)) {
- gf_log (this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
- /* Stripe count */
- sprintf (key, "trusted.%s.stripe-count", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32 (data);
- if (!local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- local->fctx->xl_array =
- GF_CALLOC (local->fctx->stripe_count,
- sizeof (xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
- }
- if (local->fctx->stripe_count != data_to_int32 (data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32 (data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- /* index */
- sprintf (key, "trusted.%s.stripe-index", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- index = data_to_int32 (data);
- if (index > priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-index xattr (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- if (local->fctx->xl_array) {
- if (local->fctx->xl_array[index]) {
- gf_log (this->name, GF_LOG_ERROR,
- "duplicate entry @ index (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- local->fctx->xl_array[index] = prev->this;
- }
- local->entry_count++;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* TODO: if self-heal flag is set, do it */
- if (local->xattr_self_heal_needed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: stripe info need to be healed",
- local->loc.path);
- }
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret)
- goto err;
-
- if (local->entry_count != local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "entry-count (%d) != stripe-count (%d)",
- local->entry_count, local->fctx->stripe_count);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
- if (!local->fctx->stripe_size) {
- gf_log (this->name, GF_LOG_ERROR, "stripe size not set");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
-
- local->call_count = local->fctx->stripe_count;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc,
- local->flags, local->fd, 0);
- trav = trav->next;
- }
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
-out:
- return 0;
-}
/**
* stripe_open -
*/
int32_t
stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
int32_t op_errno = 1;
- dict_t *dict = NULL;
- int ret = 0;
- char key[256] = {0,};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2771,8 +2490,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2788,73 +2506,25 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
/* Striped files */
local->flags = flags;
local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
-
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict)
- goto err;
-
- sprintf (key, "trusted.%s.stripe-size", this->name);
- ret = dict_set_int64 (dict, key, 8);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- sprintf (key, "trusted.%s.stripe-count", this->name);
- ret = dict_set_int32 (dict, key, 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- sprintf (key, "trusted.%s.stripe-index", this->name);
- ret = dict_set_int32 (dict, key, 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- while (trav) {
- STACK_WIND (frame, stripe_open_lookup_cbk,
- trav->xlator, trav->xlator->fops->lookup,
- loc, dict);
- trav = trav->next;
- }
- if (dict)
- dict_unref (dict);
-
- return 0;
- }
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fctx->static_array = 1;
- local->fctx->stripe_size = local->stripe_size;
- local->fctx->stripe_count = priv->child_count;
- local->fctx->xl_array = priv->xl_array;
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
while (trav) {
STACK_WIND (frame, stripe_open_cbk, trav->xlator,
trav->xlator->fops->open,
&local->loc, local->flags, local->fd,
- wbflags);
+ xdata);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2887,7 +2557,7 @@ stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
STRIPE_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
}
out:
return 0;
@@ -2895,7 +2565,7 @@ out:
int32_t
-stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -2917,8 +2587,7 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2929,19 +2598,19 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd);
+ trav->xlator->fops->opendir, loc, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2981,7 +2650,7 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
STRIPE_STACK_UNWIND (lk, frame, local->op_ret,
- local->op_errno, &local->lock);
+ local->op_errno, &local->lock, NULL);
}
out:
return 0;
@@ -2989,7 +2658,7 @@ out:
int32_t
stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -3005,8 +2674,7 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3017,20 +2685,20 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
while (trav) {
STACK_WIND (frame, stripe_lk_cbk, trav->xlator,
- trav->xlator->fops->lk, fd, cmd, lock);
+ trav->xlator->fops->lk, fd, cmd, lock, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3067,14 +2735,14 @@ stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
STRIPE_STACK_UNWIND (flush, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -3094,8 +2762,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto err;
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3106,13 +2773,13 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd);
+ trav->xlator->fops->flush, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (flush, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -3121,7 +2788,7 @@ err:
int32_t
stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3157,6 +2824,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -3179,18 +2849,19 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (fsync, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -3202,31 +2873,38 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->fctx = fctx;
+
local->op_ret = -1;
frame->local = local;
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags);
+ trav->xlator->fops->fsync, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3261,6 +2939,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf = *buf;
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -3277,7 +2958,7 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STRIPE_STACK_UNWIND (fstat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ local->op_errno, &local->stbuf, NULL);
}
out:
@@ -3287,11 +2968,12 @@ out:
int32_t
stripe_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -3303,8 +2985,7 @@ stripe_fstat (call_frame_t *frame,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3313,26 +2994,35 @@ stripe_fstat (call_frame_t *frame,
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(fd->inode->ia_type)) {
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd);
+ trav->xlator->fops->fstat, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
-stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
+ int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -3340,11 +3030,9 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
VALIDATE_OR_GOTO (fd->inode, err);
priv = this->private;
- trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3353,22 +3041,60 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->ftruncate, fd, offset);
- trav = trav->next;
- }
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (!fctx->stripe_count) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe count");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
+ "%d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
+ NULL);
+ }
return 0;
err:
- STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3405,14 +3131,14 @@ stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -3428,8 +3154,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3440,20 +3165,20 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
while (trav) {
STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags);
+ trav->xlator->fops->fsyncdir, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
int32_t
stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t i = 0;
int32_t callcnt = 0;
@@ -3463,6 +3188,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt tmp_stbuf = {0,};
struct iobref *tmp_iobref = NULL;
struct iobuf *iobuf = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3470,13 +3196,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local = frame->local;
+ prev = cookie;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
- if (op_ret != -1)
+ if (op_ret != -1) {
+ correct_file_size(buf, local->fctx, prev);
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
+ }
}
UNLOCK (&frame->lock);
@@ -3505,7 +3234,8 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
vec[count].iov_len =
(local->replies[i].requested_size -
local->replies[i].op_ret);
- iobuf = iobuf_get (this->ctx->iobuf_pool);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ vec[count].iov_len);
if (!iobuf) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory.");
@@ -3514,9 +3244,11 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto done;
}
memset (iobuf->ptr, 0, vec[count].iov_len);
- iobref_add (local->iobref, iobuf);
vec[count].iov_base = iobuf->ptr;
+ iobref_add (local->iobref, iobuf);
+ iobuf_unref(iobuf);
+
op_ret += vec[count].iov_len;
count++;
}
@@ -3534,11 +3266,10 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_FREE (local->replies);
tmp_iobref = local->iobref;
STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec,
- count, &tmp_stbuf, tmp_iobref);
+ count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (vec)
- GF_FREE (vec);
+ GF_FREE (vec);
}
out:
return 0;
@@ -3551,7 +3282,7 @@ out:
int32_t
stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
int32_t index = 0;
int32_t callcnt = 0;
@@ -3562,8 +3293,10 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_local_t *local = NULL;
struct iovec *final_vec = NULL;
struct iatt tmp_stbuf = {0,};
+ struct iatt *tmp_stbuf_p = NULL; //need it for a warning
struct iobref *tmp_iobref = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3572,6 +3305,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
index = local->node_index;
+ prev = cookie;
mframe = local->orig_frame;
if (!mframe)
goto out;
@@ -3591,6 +3325,9 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
mlocal->replies[index].stbuf = *stbuf;
mlocal->replies[index].count = count;
mlocal->replies[index].vector = iov_dup (vector, count);
+
+ correct_file_size(stbuf, fctx, prev);
+
if (local->stbuf_size < stbuf->ia_size)
local->stbuf_size = stbuf->ia_size;
local->stbuf_blocks += stbuf->ia_blocks;
@@ -3659,13 +3396,13 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_FREE (mlocal->replies);
tmp_iobref = mlocal->iobref;
/* work around for nfs truncated read. Bug 3774 */
- WIPE (&tmp_stbuf);
+ tmp_stbuf_p = &tmp_stbuf;
+ WIPE (tmp_stbuf_p);
STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref);
+ final_count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (final_vec)
- GF_FREE (final_vec);
+ GF_FREE (final_vec);
}
goto out;
@@ -3677,7 +3414,7 @@ check_size:
STACK_WIND (mframe, stripe_readv_fstat_cbk,
(fctx->xl_array[index]),
(fctx->xl_array[index])->fops->fstat,
- mlocal->fd);
+ mlocal->fd, NULL);
}
out:
@@ -3689,7 +3426,7 @@ end:
int32_t
stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int32_t op_errno = EINVAL;
int32_t idx = 0;
@@ -3702,6 +3439,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
uint64_t stripe_size = 0;
off_t rounded_start = 0;
off_t frame_offset = offset;
+ off_t dest_offset = 0;
stripe_local_t *local = NULL;
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
@@ -3712,7 +3450,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EBADFD;
goto err;
@@ -3720,6 +3458,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
if (!stripe_size) {
gf_log (this->name, GF_LOG_DEBUG,
"Wrong stripe size for the file");
@@ -3734,8 +3474,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rounded_end = roof (offset+size, stripe_size);
num_stripe = (rounded_end- rounded_start)/stripe_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3743,8 +3482,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
frame->local = local;
/* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC (num_stripe, sizeof (struct readv_replies),
- gf_stripe_mt_readv_replies);
+ local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
if (!local->replies) {
op_errno = ENOMEM;
goto err;
@@ -3759,8 +3498,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
for (index = off_index; index < (num_stripe + off_index); index++) {
rframe = copy_frame (frame);
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
op_errno = ENOMEM;
goto err;
@@ -3774,9 +3512,16 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rlocal->readv_size = frame_size;
rframe->local = rlocal;
idx = (index % fctx->stripe_count);
+
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(frame_offset,
+ stripe_size, fctx->stripe_count);
+ else
+ dest_offset = frame_offset;
+
STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->readv,
- fd, frame_size, frame_offset);
+ fd, frame_size, dest_offset, flags, xdata);
frame_offset += frame_size;
}
@@ -3786,7 +3531,7 @@ err:
if (rframe)
STRIPE_STACK_DESTROY (rframe);
- STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -3794,11 +3539,15 @@ err:
int32_t
stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+ struct stripe_replies *reply = NULL;
+ int32_t i = 0;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3807,39 +3556,82 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = ++local->call_count;
+ callcnt = ++mlocal->call_count;
+
+ mlocal->replies[local->node_index].op_ret = op_ret;
+ mlocal->replies[local->node_index].op_errno = op_errno;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = -1;
- }
if (op_ret >= 0) {
- local->op_ret += op_ret;
- local->post_buf = *postbuf;
- local->pre_buf = *prebuf;
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
}
UNLOCK (&frame->lock);
- if ((callcnt == local->wind_count) && local->unwind) {
- STRIPE_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf);
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ /*
+ * Only return the number of consecutively written bytes up until
+ * the first error. Only return an error if it occurs first.
+ *
+ * When a short write occurs, the application should retry at the
+ * appropriate offset, at which point we'll potentially pass back
+ * the error.
+ */
+ for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
+ i++, reply++) {
+ if (reply->op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "reply %d "
+ "returned error %s", i,
+ strerror(reply->op_errno));
+ if (!mlocal->op_ret) {
+ mlocal->op_ret = -1;
+ mlocal->op_errno = reply->op_errno;
+ }
+ break;
+ }
+
+ mlocal->op_ret += reply->op_ret;
+
+ if (reply->op_ret < reply->requested_size)
+ break;
+ }
+
+ GF_FREE(mlocal->replies);
+
+ STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
}
out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
int32_t
stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
struct iovec *tmp_vec = NULL;
stripe_local_t *local = NULL;
@@ -3853,13 +3645,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t fill_size = 0;
uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ off_t rounded_start = 0;
+ off_t rounded_end = 0;
+ int32_t total_chunks = 0;
+ call_frame_t *wframe = NULL;
+ stripe_local_t *wlocal = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EINVAL;
goto err;
@@ -3867,22 +3665,51 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
/* File has to be stripped across the child nodes */
for (idx = 0; idx< count; idx ++) {
total_size += vector[idx].iov_len;
}
remaining_size = total_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ rounded_start = floor(offset, stripe_size);
+ rounded_end = roof(offset + total_size, stripe_size);
+ total_chunks = (rounded_end - rounded_start) / stripe_size;
+ local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
+ if (!local->replies) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ total_chunks = 0;
while (1) {
+ wframe = copy_frame(frame);
+ wlocal = mem_get0(this->local_pool);
+ if (!wlocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ wlocal->orig_frame = frame;
+ wframe->local = wlocal;
+
/* Send striped chunk of the vector to child
nodes appropriately. */
idx = (((offset + offset_offset) /
@@ -3910,51 +3737,587 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (remaining_size == 0)
local->unwind = 1;
- STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx],
+ /*
+ * Store off the request index (with respect to the chunk of the
+ * initial offset) and the size of the request. This is required
+ * in the callback to calculate an appropriate return value in
+ * the event of a write failure in one or more requests.
+ */
+ wlocal->node_index = total_chunks;
+ local->replies[total_chunks].requested_size = fill_size;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, offset + offset_offset, iobref);
+ tmp_count, dest_offset, flags, iobref,
+ xdata);
+
GF_FREE (tmp_vec);
offset_offset += fill_size;
+ total_chunks++;
if (remaining_size == 0)
break;
}
return 0;
err:
- STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ if (wframe)
+ STRIPE_STACK_DESTROY(wframe);
+
+ STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_release (xlator_t *this, fd_t *fd)
+stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send fallocate request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single fallocate per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->fallocate, fd, mode,
+ dest_offset, fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+ VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_del (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
+ op_errno = EINVAL;
goto err;
}
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send discard request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single discard per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->discard, fd, dest_offset,
+ fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
- if (!fctx->static_array)
- GF_FREE (fctx->xl_array);
+ STRIPE_VALIDATE_FCTX (fctx, err);
- GF_FREE (fctx);
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size,
+ fctx->stripe_count);
+
+ STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->zerofill, fd,
+ dest_offset, fill_size, xdata);
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_release (xlator_t *this, fd_t *fd)
+{
return 0;
}
int
stripe_forget (xlator_t *this, inode_t *inode)
{
- (void) inode_ctx_del (inode, this, 0);
+ uint64_t tmp_fctx = 0;
+ stripe_fd_ctx_t *fctx = NULL;
+
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (inode, err);
+
+ (void) inode_ctx_del (inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ goto err;
+ }
+
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+
+ if (!fctx->static_array)
+ GF_FREE (fctx->xl_array);
+
+ GF_FREE (fctx);
+err:
return 0;
}
@@ -3964,6 +4327,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
stripe_private_t *priv = NULL;
int down_client = 0;
int i = 0;
+ gf_boolean_t heard_from_all_children = _gf_false;
if (!this)
return 0;
@@ -3975,30 +4339,34 @@ notify (xlator_t *this, int32_t event, void *data, ...)
switch (event)
{
case GF_EVENT_CHILD_UP:
- case GF_EVENT_CHILD_CONNECTING:
{
/* get an index number to set */
for (i = 0; i < priv->child_count; i++) {
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
if (data == FIRST_CHILD (this))
priv->first_child_down = 0;
- if (!priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
break;
+ case GF_EVENT_CHILD_CONNECTING:
+ {
+ // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
+ goto out;
+ }
case GF_EVENT_CHILD_DOWN:
{
/* get an index number to set */
@@ -4006,20 +4374,19 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
if (data == FIRST_CHILD (this))
priv->first_child_down = 1;
- if (priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
@@ -4029,79 +4396,252 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
/* */
default_notify (this, event, data);
+ goto out;
}
break;
}
+ // Consider child as down if it's last_event is not CHILD_UP
+ for (i = 0, down_client = 0; i < priv->child_count; i++)
+ if (priv->last_event[i] != GF_EVENT_CHILD_UP)
+ down_client++;
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+ }
+ UNLOCK (&priv->lock);
+
+ heard_from_all_children = _gf_true;
+ for (i = 0; i < priv->child_count; i++)
+ if (!priv->last_event[i])
+ heard_from_all_children = _gf_false;
+
+ if (heard_from_all_children)
+ default_notify (this, event, data);
+out:
return 0;
}
int
-set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+stripe_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
+ int ret = -1;
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r (data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup (stripe_str);
- stripe_opt = CALLOC (1, sizeof (struct stripe_options));
- if (!stripe_opt) {
- GF_FREE (dup_str);
- goto out;
- }
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
- }
- if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
- goto out;
- }
+ local = frame->local;
- if (stripe_opt->block_size < 512) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
- "%s. Should be atleast 512 bytes", num);
- goto out;
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+
+ /**
+ * We overwrite ->op_* values here for subsequent faliure
+ * conditions, hence we propogate the last errno down the
+ * stack.
+ */
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unlock;
}
- if (stripe_opt->block_size % 512) {
- gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
- " be a multiple of 512 bytes", num);
- goto out;
+ }
+
+ unlock:
+ UNLOCK (&frame->lock);
+
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BD_XLATOR
+int
+stripe_is_bd (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *is_bd = data;
+
+ if (data == NULL)
+ return 0;
+
+ if (XATTR_IS_BD (key))
+ *is_bd = _gf_true;
+
+ return 0;
+}
+
+inline gf_boolean_t
+stripe_setxattr_is_bd (dict_t *dict)
+{
+ gf_boolean_t is_bd = _gf_false;
+
+ if (dict == NULL)
+ goto out;
+
+ dict_foreach (dict, stripe_is_bd, &is_bd);
+out:
+ return is_bd;
+}
+#else
+#define stripe_setxattr_is_bd(dict) _gf_false
+#endif
+
+int
+stripe_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int i = 0;
+ gf_boolean_t is_bd = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+ local->wind_count = priv->child_count;
+ local->op_ret = local->op_errno = 0;
+
+ is_bd = stripe_setxattr_is_bd (dict);
+
+ /**
+ * Set xattrs for directories on all subvolumes. Additionally
+ * this power is only given to a special client. Bd xlator
+ * also needs xattrs for regular files (ie LVs)
+ */
+ if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
+ IA_ISDIR (loc->inode->ia_type)) || is_bd) {
+ for (i = 0; i < priv->child_count; i++, trav = trav->next) {
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ trav->xlator, trav->xlator->fops->setxattr,
+ loc, dict, flags, xdata);
}
+ } else {
+ local->wind_count = 1;
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
- memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %"PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
- if (!priv->pattern) {
- priv->pattern = stripe_opt;
- } else {
- temp_stripeopt = priv->pattern;
- while (temp_stripeopt->next)
- temp_stripeopt = temp_stripeopt->next;
- temp_stripeopt->next = stripe_opt;
+int
+stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+stripe_is_special_key (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ gf_boolean_t *is_special = NULL;
+
+ if (data == NULL) {
+ goto out;
+ }
+
+ is_special = data;
+
+ if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key))
+ *is_special = _gf_true;
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ stripe_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->wind_count;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+ return 0;
+}
+
+int
+stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ stripe_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ goto out;
+ }
+
+ frame->local = local;
+
+ local->wind_count = priv->child_count;
+
+ trav = this->children;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_fsetxattr_everyone_cbk,
+ trav->xlator, trav->xlator->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ trav = trav->next;
}
ret = 0;
@@ -4109,80 +4649,220 @@ out:
return ret;
}
-int32_t
-stripe_iatt_merge (struct iatt *from, struct iatt *to)
+inline gf_boolean_t
+stripe_fsetxattr_is_special (dict_t *dict)
{
- if (to->ia_size < from->ia_size)
- to->ia_size = from->ia_size;
- if (to->ia_mtime < from->ia_mtime)
- to->ia_mtime = from->ia_mtime;
- if (to->ia_ctime < from->ia_ctime)
- to->ia_ctime = from->ia_ctime;
- if (to->ia_atime < from->ia_atime)
- to->ia_atime = from->ia_atime;
- return 0;
+ gf_boolean_t is_spl = _gf_false;
+
+ if (dict == NULL) {
+ goto out;
+ }
+
+ dict_foreach (dict, stripe_is_special_key, &is_spl);
+
+out:
+ return is_spl;
}
-int32_t
-stripe_readdirp_entry_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+int
+stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
- stripe_local_t *local = NULL;
- int32_t done = 0;
+ int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
+ gf_boolean_t is_spl = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ is_spl = stripe_fsetxattr_is_special (dict);
+ if (is_spl) {
+ ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict,
+ flags, xdata);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (!this || !frame || !frame->local || !cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
goto out;
}
- entry = cookie;
+
+ STACK_WIND (frame, stripe_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+out:
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, stripe_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, stripe_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *parent)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ stripe_local_t *main_local = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ int done = 0;
+
local = frame->local;
+ prev = cookie;
+
+ entry = local->dirent;
+
+ main_frame = local->orig_frame;
+ main_local = main_frame->local;
LOCK (&frame->lock);
{
- local->wind_count--;
- if (!local->wind_count)
+ local->call_count--;
+ if (!local->call_count)
done = 1;
if (op_ret == -1) {
local->op_errno = op_errno;
local->op_ret = op_ret;
goto unlock;
}
- stripe_iatt_merge (buf, &entry->d_stat);
+
+ if (stripe_ctx_handle(this, prev, local, xattr))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict.");
+
+ correct_file_size(stbuf, local->fctx, prev);
+
+ stripe_iatt_merge (stbuf, &entry->d_stat);
+ local->stbuf_blocks += stbuf->ia_blocks;
}
unlock:
UNLOCK(&frame->lock);
if (done) {
- frame->local = NULL;
- STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, &local->entries);
+ inode_ctx_put (entry->inode, this,
+ (uint64_t) (long)local->fctx);
- gf_dirent_free (&local->entries);
+ done = 0;
+ LOCK (&main_frame->lock);
+ {
+ main_local->wind_count--;
+ if (!main_local->wind_count)
+ done = 1;
+ if (local->op_ret == -1) {
+ main_local->op_errno = local->op_errno;
+ main_local->op_ret = local->op_ret;
+ }
+ entry->d_stat.ia_blocks = local->stbuf_blocks;
+ }
+ UNLOCK (&main_frame->lock);
+ if (done) {
+ main_frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, main_frame,
+ main_local->op_ret,
+ main_local->op_errno,
+ &main_local->entries, NULL);
+ gf_dirent_free (&main_local->entries);
+ stripe_local_wipe (main_local);
+ mem_put (main_local);
+ }
+ frame->local = NULL;
stripe_local_wipe (local);
- GF_FREE (local);
+ mem_put (local);
+ STRIPE_STACK_DESTROY (frame);
}
-out:
- return 0;
+ return 0;
}
int32_t
stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries)
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *orig_entries, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
gf_dirent_t *local_entry = NULL;
- int32_t ret = -1;
gf_dirent_t *tmp_entry = NULL;
xlator_list_t *trav = NULL;
loc_t loc = {0, };
- inode_t *inode = NULL;
- char *path;
int32_t count = 0;
stripe_private_t *priv = NULL;
int32_t subvols = 0;
+ dict_t *xattrs = NULL;
+ call_frame_t *local_frame = NULL;
+ stripe_local_t *local_ent = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -4208,8 +4888,9 @@ stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
list_splice_init (&orig_entries->list,
&local->entries.list);
- local->wind_count = op_ret * subvols;
+ local->wind_count = op_ret;
}
+
}
unlock:
UNLOCK (&frame->lock);
@@ -4217,8 +4898,10 @@ unlock:
if (op_ret == -1)
goto out;
+ xattrs = dict_new ();
+ if (xattrs)
+ (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
count = op_ret;
- ret = 0;
list_for_each_entry_safe (local_entry, tmp_entry,
(&local->entries.list), list) {
@@ -4227,68 +4910,73 @@ unlock:
if (!IA_ISREG (local_entry->d_stat.ia_type)) {
LOCK (&frame->lock);
{
- local->wind_count -= subvols;
+ local->wind_count--;
count = local->wind_count;
}
UNLOCK (&frame->lock);
continue;
}
- inode = inode_new (local->fd->inode->table);
- if (!inode)
+ local_frame = copy_frame (frame);
+
+ if (!local_frame) {
+ op_errno = ENOMEM;
+ op_ret = -1;
goto out;
+ }
- loc.inode = inode;
- loc.parent = local->fd->inode;
- ret = inode_path (local->fd->inode, local_entry->d_name, &path);
- if (ret != -1) {
- loc.path = path;
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret != -1) {
- loc.path = path;
- } else {
- goto out;
- }
+ local_ent = mem_get0 (this->local_pool);
+ if (!local_ent) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
}
- loc.name = strrchr (loc.path, '/');
- loc.name++;
+ loc.inode = inode_ref (local_entry->inode);
+
uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid);
+ local_ent->orig_frame = frame;
+
+ local_ent->call_count = subvols;
+
+ local_ent->dirent = local_entry;
+
+ local_frame->local = local_ent;
+
trav = this->children;
while (trav) {
- STACK_WIND_COOKIE (frame, stripe_readdirp_entry_stat_cbk,
- local_entry, trav->xlator,
- trav->xlator->fops->stat, &loc);
+ STACK_WIND (local_frame, stripe_readdirp_lookup_cbk,
+ trav->xlator, trav->xlator->fops->lookup,
+ &loc, xattrs);
trav = trav->next;
}
- inode_unref (loc.inode);
+ loc_wipe (&loc);
}
out:
if (!count) {
/* all entries are directories */
frame->local = NULL;
STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, &local->entries);
+ local->op_errno, &local->entries, NULL);
gf_dirent_free (&local->entries);
stripe_local_wipe (local);
- GF_FREE (local);
+ mem_put (local);
}
-
+ if (xattrs)
+ dict_unref (xattrs);
return 0;
}
int32_t
stripe_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
int op_errno = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -4302,8 +4990,7 @@ stripe_readdirp (call_frame_t *frame, xlator_t *this,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -4323,15 +5010,16 @@ stripe_readdirp (call_frame_t *frame, xlator_t *this,
goto err;
STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator,
- trav->xlator->fops->readdirp, fd, size, off);
+ trav->xlator->fops->readdirp, fd, size, off, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -4352,21 +5040,86 @@ out:
return ret;
}
+static int
+clear_pattern_list (stripe_private_t *priv)
+{
+ struct stripe_options *prev = NULL;
+ struct stripe_options *trav = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+
+ trav = priv->pattern;
+ priv->pattern = NULL;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+
+
+}
+
int
reconfigure (xlator_t *this, dict_t *options)
{
- stripe_private_t *priv = NULL;
- int ret = -1;
+ stripe_private_t *priv = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
- priv = this->private;
+ priv = this->private;
- GF_OPTION_RECONF ("block-size", priv->block_size, options, size, out);
ret = 0;
-out:
- return ret;
+ LOCK (&priv->lock);
+ {
+ ret = clear_pattern_list (priv);
+ if (ret)
+ goto unlock;
+
+ data = dict_get (options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
+ } else {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
+ unlock);
+ }
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
+
+ ret = 0;
+ out:
+ return ret;
}
@@ -4379,6 +5132,7 @@ int32_t
init (xlator_t *this)
{
stripe_private_t *priv = NULL;
+ volume_option_t *opt = NULL;
xlator_list_t *trav = NULL;
data_t *data = NULL;
int32_t count = 0;
@@ -4422,9 +5176,9 @@ init (xlator_t *this)
if (!priv->xl_array)
goto out;
- priv->state = GF_CALLOC (count, sizeof (int8_t),
- gf_stripe_mt_int8_t);
- if (!priv->state)
+ priv->last_event = GF_CALLOC (count, sizeof (int),
+ gf_stripe_mt_int32_t);
+ if (!priv->last_event)
goto out;
priv->child_count = count;
@@ -4444,30 +5198,56 @@ init (xlator_t *this)
goto out;
}
-
- GF_OPTION_INIT ("block-size", priv->block_size, size, out);
-
- /* option stripe-pattern *avi:1GB,*pdf:4096 */
- data = dict_get (this->options, "block-size");
- if (data) {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto out;
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ /* option stripe-pattern *avi:1GB,*pdf:16K */
+ data = dict_get (this->options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
+ }
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out);
-
/* notify related */
priv->nodes_down = priv->child_count;
- this->private = priv;
+ GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
+
+ this->local_pool = mem_pool_new (stripe_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = priv;
ret = 0;
out:
if (ret) {
if (priv) {
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ GF_FREE (priv->xl_array);
GF_FREE (priv);
}
}
@@ -4491,15 +5271,15 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
this->private = NULL;
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ GF_FREE (priv->xl_array);
trav = priv->pattern;
while (trav) {
prev = trav;
trav = trav->next;
- FREE (prev);
+ GF_FREE (prev);
}
+ GF_FREE (priv->last_event);
LOCK_DESTROY (&priv->lock);
GF_FREE (priv);
}
@@ -4510,17 +5290,50 @@ out:
int32_t
stripe_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
+int
+stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ char coalesce_key[256] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ if (!xattr || (op_ret == -1))
+ goto out;
+
+ sprintf (size_key, "trusted.%s.stripe-size", this->name);
+ sprintf (count_key, "trusted.%s.stripe-count", this->name);
+ sprintf (index_key, "trusted.%s.stripe-index", this->name);
+ sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name);
+
+ dict_del (xattr, size_key);
+ dict_del (xattr, count_key);
+ dict_del (xattr, index_key);
+ dict_del (xattr, coalesce_key);
+
+out:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+
+}
int
stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
int call_cnt = 0;
stripe_local_t *local = NULL;
@@ -4550,92 +5363,39 @@ stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
if (!call_cnt) {
STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
- local->xattr);
+ local->xattr, xdata);
}
return 0;
}
int32_t
-stripe_pathinfo_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
+stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- int32_t i = 0;
- int32_t ret = -1;
- int32_t len = 0;
- char *sbuf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!buffer || !local || !local->xattr_list)
- goto out;
-
- sbuf = buffer;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->pathinfo_len;
-
- if (len && xattr && xattr->pathinfo) {
- memcpy (buffer, xattr->pathinfo, len);
- buffer += len;
- *buffer++ = ' ';
- }
- }
-
- *--buffer = '\0';
- if (total)
- *total = buffer - sbuf;
- ret = 0;
-
- out:
- return ret;
-}
-
-int32_t
-stripe_free_pathinfo_str (stripe_local_t *local)
-{
- int32_t i = 0;
- int32_t ret = -1;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!local || !local->xattr_list)
- goto out;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
-
- if (xattr && xattr->pathinfo)
- GF_FREE (xattr->pathinfo);
- }
-
- ret = 0;
- out:
- return ret;
-}
-
-int32_t
-stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict) {
stripe_local_t *local = NULL;
int32_t callcnt = 0;
int32_t ret = -1;
long cky = 0;
- char *pathinfo = NULL;
- char *pathinfo_serz = NULL;
- int32_t padding = 0;
- int32_t tlen = 0;
- char stripe_size_str[20] = {0,};
+ void *xattr_val = NULL;
+ void *xattr_serz = NULL;
stripe_xattr_sort_t *xattr = NULL;
dict_t *stripe_xattr = NULL;
if (!frame || !frame->local || !this) {
- gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
return ret;
}
local = frame->local;
cky = (long) cookie;
+ if (local->xsel[0] == '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk");
+ return ret;
+ }
+
LOCK (&frame->lock);
{
callcnt = --local->wind_count;
@@ -4644,23 +5404,26 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
goto out;
if (!local->xattr_list)
- local->xattr_list = (stripe_xattr_sort_t *) GF_CALLOC (local->nallocs,
- sizeof (stripe_xattr_sort_t),
- gf_stripe_mt_xattr_sort_t);
+ local->xattr_list = (stripe_xattr_sort_t *)
+ GF_CALLOC (local->nallocs,
+ sizeof (stripe_xattr_sort_t),
+ gf_stripe_mt_xattr_sort_t);
if (local->xattr_list) {
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret)
- goto out;
-
xattr = local->xattr_list + (int32_t) cky;
- pathinfo = gf_strdup (pathinfo);
+ ret = dict_get_ptr_and_len (dict, local->xsel,
+ &xattr_val,
+ &xattr->xattr_len);
+ if (xattr->xattr_len == 0)
+ goto out;
+
xattr->pos = cky;
- xattr->pathinfo = pathinfo;
- xattr->pathinfo_len = strlen (pathinfo);
+ xattr->xattr_value = gf_memdup (xattr_val,
+ xattr->xattr_len);
- local->xattr_total_len += strlen (pathinfo) + 1;
+ if (xattr->xattr_value != NULL)
+ local->xattr_total_len += xattr->xattr_len + 1;
}
}
out:
@@ -4674,41 +5437,36 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
if (!stripe_xattr)
goto unwind;
- snprintf (stripe_size_str, 20, "%ld", local->stripe_size);
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
- + strlen (stripe_size_str) + 7;
- local->xattr_total_len += (padding + 2);
-
- pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
- gf_common_mt_char);
- if (!pathinfo_serz)
- goto unwind;
-
- /* xlator info */
- sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ", this->name, stripe_size_str);
-
- ret = stripe_pathinfo_aggregate (pathinfo_serz + padding, local, &tlen);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list");
+ /* select filler based on ->xsel */
+ if (XATTR_IS_PATHINFO (local->xsel))
+ ret = stripe_fill_pathinfo_xattr (this, local,
+ (char **)&xattr_serz);
+ else if (XATTR_IS_LOCKINFO (local->xsel)) {
+ ret = stripe_fill_lockinfo_xattr (this, local,
+ &xattr_serz);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown xattr in xattr request");
goto unwind;
}
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
-
- ret = dict_set_dynstr (stripe_xattr, GF_XATTR_PATHINFO_KEY, pathinfo_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo key in dict");
+ if (!ret) {
+ ret = dict_set_dynptr (stripe_xattr, local->xsel,
+ xattr_serz,
+ local->xattr_total_len);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Can't set %s key in dict",
+ local->xsel);
+ }
unwind:
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, stripe_xattr);
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ stripe_xattr, NULL);
- ret = stripe_free_pathinfo_str (local);
+ ret = stripe_free_xattr_str (local);
- if (local->xattr_list)
- GF_FREE (local->xattr_list);
+ GF_FREE (local->xattr_list);
if (stripe_xattr)
dict_unref (stripe_xattr);
@@ -4719,7 +5477,7 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
int32_t
stripe_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -4739,8 +5497,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -4751,7 +5508,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
sub_volumes = alloca ( priv->child_count *
@@ -4766,7 +5523,8 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (cluster_getmarkerattr (frame, this, loc, name,
local, stripe_getxattr_unwind,
sub_volumes, priv->child_count,
- MARKER_UUID_TYPE, priv->vol_uuid)) {
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ priv->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
@@ -4782,28 +5540,39 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
trav = trav->next) {
STACK_WIND (frame, stripe_getxattr_cbk,
trav->xlator, trav->xlator->fops->getxattr,
- loc, name);
+ loc, name, xdata);
}
return 0;
}
- if (name && (strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0)) {
- ret = inode_ctx_get (loc->inode, this,
- (uint64_t *) &local->stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "stripe size unavailable from inode ctx - relying"
- " on pathinfo could lead to wrong results");
+ if (name &&
+ ((strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)) == 0))) {
+ if (IA_ISREG (loc->inode->ia_type)) {
+ ret = inode_ctx_get (loc->inode, this,
+ (uint64_t *) &local->fctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe size unavailable from fctx"
+ " relying on pathinfo could lead to"
+ " wrong results");
+ }
+
local->nallocs = local->wind_count = priv->child_count;
+ (void) strncpy (local->xsel, name, strlen (name));
+ /**
+ * for xattrs that need info from all childs, fill ->xsel
+ * as above and call the filler function in cbk based on
+ * it
+ */
for (i = 0, trav = this->children; i < priv->child_count; i++,
trav = trav->next) {
- STACK_WIND_COOKIE (frame, stripe_getxattr_pathinfo_cbk,
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
(void *) (long) i, trav->xlator,
trav->xlator->fops->getxattr,
- loc, name);
+ loc, name, xdata);
}
return 0;
@@ -4811,42 +5580,125 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (name &&(*priv->vol_uuid)) {
if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
- local->marker.call_count = priv->child_count;
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
- sub_volumes = alloca ( priv->child_count *
- sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+ if (!IA_FILE_OR_DIR (loc->inode->ia_type))
+ local->marker.call_count = 1;
+ else
+ local->marker.call_count = priv->child_count;
+ sub_volumes = alloca (local->marker.call_count *
+ sizeof (xlator_t *));
+
+ for (i = 0, trav = this->children;
+ i < local->marker.call_count;
+ i++, trav = trav->next) {
*(sub_volumes + i) = trav->xlator;
}
if (cluster_getmarkerattr (frame, this, loc, name,
- local, stripe_getxattr_unwind,
+ local,
+ stripe_getxattr_unwind,
sub_volumes,
- priv->child_count,
+ local->marker.call_count,
MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
priv->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
+
return 0;
}
}
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
+inline gf_boolean_t
+stripe_is_special_xattr (const char *name)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (!name) {
+ goto out;
+ }
+
+ if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))
+ || !strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)))
+ is_spl = _gf_true;
+out:
+ return is_spl;
+}
+
+int32_t
+stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t ret = -1, op_errno = 0;
+ int i = 0;
+ xlator_list_t *trav = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = -1;
+ frame->local = local;
+
+ strncpy (local->xsel, name, strlen (name));
+ local->nallocs = local->wind_count = priv->child_count;
+
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->fgetxattr,
+ fd, name, xdata);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return ret;
+}
+
+int32_t
+stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ if (stripe_is_special_xattr (name)) {
+ stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata);
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+
+out:
+ return 0;
+}
+
+
+
int32_t
stripe_priv_dump (xlator_t *this)
{
@@ -4895,32 +5747,39 @@ out:
}
struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
-
- .getxattr = stripe_getxattr,
- .readdirp = stripe_readdirp,
+ .stat = stripe_stat,
+ .unlink = stripe_unlink,
+ .rename = stripe_rename,
+ .link = stripe_link,
+ .truncate = stripe_truncate,
+ .create = stripe_create,
+ .open = stripe_open,
+ .readv = stripe_readv,
+ .writev = stripe_writev,
+ .statfs = stripe_statfs,
+ .flush = stripe_flush,
+ .fsync = stripe_fsync,
+ .ftruncate = stripe_ftruncate,
+ .fstat = stripe_fstat,
+ .mkdir = stripe_mkdir,
+ .rmdir = stripe_rmdir,
+ .lk = stripe_lk,
+ .opendir = stripe_opendir,
+ .fsyncdir = stripe_fsyncdir,
+ .setattr = stripe_setattr,
+ .fsetattr = stripe_fsetattr,
+ .lookup = stripe_lookup,
+ .mknod = stripe_mknod,
+ .setxattr = stripe_setxattr,
+ .fsetxattr = stripe_fsetxattr,
+ .getxattr = stripe_getxattr,
+ .fgetxattr = stripe_fgetxattr,
+ .removexattr = stripe_removexattr,
+ .fremovexattr = stripe_fremovexattr,
+ .readdirp = stripe_readdirp,
+ .fallocate = stripe_fallocate,
+ .discard = stripe_discard,
+ .zerofill = stripe_zerofill,
};
struct xlator_cbks cbks = {
@@ -4934,8 +5793,9 @@ struct xlator_dumpops dumpops = {
struct volume_options options[] = {
{ .key = {"block-size"},
- .type = GF_OPTION_TYPE_ANY,
+ .type = GF_OPTION_TYPE_SIZE_LIST,
.default_value = "128KB",
+ .min = STRIPE_MIN_BLOCK_SIZE,
.description = "Size of the stripe unit that would be read "
"from or written to the striped servers."
},
@@ -4943,5 +5803,12 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true"
},
+ { .key = {"coalesce"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Enable/Disable coalesce mode to flatten striped "
+ "files as stored on the server (i.e., eliminate holes "
+ "caused by the traditional format)."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
index 3ab67d621..5673d18f3 100644
--- a/xlators/cluster/stripe/src/stripe.h
+++ b/xlators/cluster/stripe/src/stripe.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -38,36 +29,53 @@
#include <signal.h>
#define STRIPE_PATHINFO_HEADER "STRIPE:"
-
+#define STRIPE_MIN_BLOCK_SIZE (16*GF_UNIT_KB)
#define STRIPE_STACK_UNWIND(fop, frame, params ...) do { \
stripe_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ stripe_local_wipe(__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_STACK_DESTROY(frame) do { \
+ stripe_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
if (__local) { \
- stripe_local_wipe(__local); \
- GF_FREE (__local); \
+ stripe_local_wipe (__local); \
+ mem_put (__local); \
} \
} while (0)
-#define STRIPE_STACK_DESTROY(frame) do { \
- stripe_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- if (__local) { \
- stripe_local_wipe (__local); \
- GF_FREE (__local); \
- } \
- } while (0)
+#define STRIPE_VALIDATE_FCTX(fctx, label) do { \
+ int idx = 0; \
+ if (!fctx) { \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ for (idx = 0; idx < fctx->stripe_count; idx++) { \
+ if (!fctx->xl_array[idx]) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "fctx->xl_array[%d] is NULL", \
+ idx); \
+ op_errno = ESTALE; \
+ goto label; \
+ } \
+ } \
+ } while (0)
typedef struct stripe_xattr_sort {
- int32_t pos;
- int32_t pathinfo_len;
- char *pathinfo;
+ int pos;
+ int xattr_len;
+ char *xattr_value;
} stripe_xattr_sort_t;
/**
@@ -90,16 +98,17 @@ struct stripe_private {
gf_lock_t lock;
uint8_t nodes_down;
int8_t first_child_down;
+ int *last_event;
int8_t child_count;
- int8_t *state; /* Current state of child node */
gf_boolean_t xattr_supported; /* default yes */
+ gf_boolean_t coalesce;
char vol_uuid[UUID_SIZE + 1];
};
/**
- * Used to keep info about the replies received from fops->readv calls
+ * Used to keep info about the replies received from readv/writev calls
*/
-struct readv_replies {
+struct stripe_replies {
struct iovec *vector;
int32_t count; //count of vector
int32_t op_ret; //op_ret of readv
@@ -111,6 +120,7 @@ struct readv_replies {
typedef struct _stripe_fd_ctx {
off_t stripe_size;
int stripe_count;
+ int stripe_coalesce;
int static_array;
xlator_t **xl_array;
} stripe_fd_ctx_t;
@@ -146,7 +156,7 @@ struct stripe_local {
blkcnt_t preparent_blocks;
blkcnt_t postparent_blocks;
- struct readv_replies *replies;
+ struct stripe_replies *replies;
struct statvfs statvfs_buf;
dir_entry_t *entry;
@@ -173,11 +183,12 @@ struct stripe_local {
mode_t mode;
dev_t rdev;
/* For File I/O fops */
- dict_t *dict;
+ dict_t *xdata;
stripe_xattr_sort_t *xattr_list;
int32_t xattr_total_len;
int32_t nallocs;
+ char xsel[256];
struct marker_str marker;
@@ -194,12 +205,84 @@ struct stripe_local {
void *value;
struct iobref *iobref;
gf_dirent_t entries;
+ gf_dirent_t *dirent;
dict_t *xattr;
uuid_t ia_gfid;
+
+ int xflag;
+ mode_t umask;
};
typedef struct stripe_local stripe_local_t;
typedef struct stripe_private stripe_private_t;
+/*
+ * Determine the stripe index of a particular frame based on the translator.
+ */
+static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int32_t i, idx = -1;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (fctx->xl_array[i] == prev->this) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dst[i] = src[i];
+}
+
+void stripe_local_wipe (stripe_local_t *local);
+int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
+ stripe_local_t *local, dict_t *dict);
+void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
+int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
+ uint64_t stripe_size, uint32_t stripe_count,
+ uint32_t stripe_index,
+ uint32_t stripe_coalesce);
+int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
+int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
+int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
+int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz);
+int32_t stripe_free_xattr_str (stripe_local_t *local);
+int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
+ int32_t *total);
+off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
+off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index);
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz);
+
+/*
+ * Adjust the size attribute for files if coalesce is enabled.
+ */
+static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int index;
+
+ if (!IA_ISREG(buf->ia_type))
+ return;
+
+ if (!fctx || !fctx->stripe_coalesce)
+ return;
+
+ index = stripe_get_frame_index(fctx, prev);
+ buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
+ fctx->stripe_count, index);
+}
#endif /* _STRIPE_H_ */
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
deleted file mode 100644
index 2a1fe8372..000000000
--- a/xlators/cluster/unify/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-
-xlator_LTLIBRARIES = unify.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/legacy/cluster
-
-unify_la_LDFLAGS = -module -avoidversion
-
-unify_la_SOURCES = unify.c unify-self-heal.c
-unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = unify.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/unify/src/unify-mem-types.h b/xlators/cluster/unify/src/unify-mem-types.h
deleted file mode 100644
index 13c9cc1f7..000000000
--- a/xlators/cluster/unify/src/unify-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __UNIFY_MEM_TYPES_H__
-#define __UNIFY_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_unify_mem_types_ {
- gf_unify_mt_char = gf_common_mt_end + 1,
- gf_unify_mt_int16_t,
- gf_unify_mt_xlator_t,
- gf_unify_mt_unify_private_t,
- gf_unify_mt_xlator_list_t,
- gf_unify_mt_dir_entry_t,
- gf_unify_mt_off_t,
- gf_unify_mt_int,
- gf_unify_mt_unify_self_heal_struct,
- gf_unify_mt_unify_local_t,
- gf_unify_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
deleted file mode 100644
index f99e4c7c3..000000000
--- a/xlators/cluster/unify/src/unify-self-heal.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * unify-self-heal.c :
- * This file implements few functions which enables 'unify' translator
- * to be consistent in its behaviour when
- * > a node fails,
- * > a node gets added,
- * > a failed node comes back
- * > a new namespace server is added (ie, an fresh namespace server).
- *
- * This functionality of 'unify' will enable glusterfs to support storage
- * system failure, and maintain consistancy. This works both ways, ie, when
- * an entry (either file or directory) is found on namespace server, and not
- * on storage nodes, its created in storage nodes and vica-versa.
- *
- * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
- *
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "common-utils.h"
-
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
-
- if (local->sh_struct) {
- if (local->sh_struct->offset_list)
- GF_FREE (local->sh_struct->offset_list);
-
- if (local->sh_struct->entry_list)
- GF_FREE (local->sh_struct->entry_list);
-
- if (local->sh_struct->count_list)
- GF_FREE (local->sh_struct->count_list);
-
- GF_FREE (local->sh_struct);
- }
-
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-int32_t
-unify_sh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents on
- * storagenodes is still pending.
- */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more entries
- to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so, it
- can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_sh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_sh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
- }
- UNLOCK (&frame->lock);
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_sh_getdents_cbk -
- */
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_sh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_sh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_sh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_WARNING, "failed");
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- * track of offset sent to each node during
- * STACK_WIND.
- */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
-
- /* did stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
- /* Only 'self-heal' failed, lookup() was successful. */
- local->op_ret = 0;
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
- &local->stbuf, local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * gf_sh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_sh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in
- only one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be
- same accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at directory level */
- local->call_count = 0;
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
-
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->stbuf,
- local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/* Foreground self-heal part over */
-
-/* Background self-heal part */
-
-int32_t
-unify_bgsh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents
- on storagenodes is still pending. */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more
- entries to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- fd_unref (local->fd);
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so,
- it can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_bgsh_getdents_cbk -
- */
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_bgsh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
-
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_bgsh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_bgsh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int32_t callcnt = 0;
- int16_t index = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
- callcnt = local->call_count;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- track of offset sent to each node during
- STACK_WIND. */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
- /* did a stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- fd_unref (local->fd);
-
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/**
- * gf_bgsh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_bgsh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in only
- one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be same
- accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at the directory level */
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
-
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/* Background self-heal part over */
-
-
-
-
-/**
- * zr_unify_self_heal -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local)
-{
- unify_private_t *priv = this->private;
- call_frame_t *bg_frame = NULL;
- unify_local_t *bg_local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
- int16_t index = 0;
-
- if (local->inode_generation < priv->inode_generation) {
- /* Any self heal will be done at the directory level */
- /* Update the inode's generation to the current generation
- value. */
- local->inode_generation = priv->inode_generation;
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->inode_generation);
-
- if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
- local->op_ret = 0;
- local->failed = 0;
- local->call_count = priv->child_count + 1;
- local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &local->loc1,
- 0);
- }
-
- /* Self-heal in foreground, hence no need
- to UNWIND here */
- return 0;
- }
-
- /* Self Heal done in background */
- bg_frame = copy_frame (frame);
- INIT_LOCAL (bg_frame, bg_local);
- loc_copy (&bg_local->loc1, &local->loc1);
- bg_local->op_ret = 0;
- bg_local->failed = 0;
- bg_local->call_count = priv->child_count + 1;
- bg_local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (bg_frame,
- unify_bgsh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &bg_local->loc1,
- 0);
- }
- }
-
- /* generation number matches, self heal already done or
- * self heal done in background: just do STACK_UNWIND
- */
- tmp_inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- tmp_inode,
- &local->stbuf,
- local->dict,
- &local->oldpostparent);
-
- if (tmp_dict)
- dict_unref (tmp_dict);
-
- return 0;
-}
-
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
deleted file mode 100644
index 6dc93083d..000000000
--- a/xlators/cluster/unify/src/unify.c
+++ /dev/null
@@ -1,4589 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * xlators/cluster/unify:
- * - This xlator is one of the main translator in GlusterFS, which
- * actually does the clustering work of the file system. One need to
- * understand that, unify assumes file to be existing in only one of
- * the child node, and directories to be present on all the nodes.
- *
- * NOTE:
- * Now, unify has support for global namespace, which is used to keep a
- * global view of fs's namespace tree. The stat for directories are taken
- * just from the namespace, where as for files, just 'ia_ino' is taken from
- * Namespace node, and other stat info is taken from the actual storage node.
- * Also Namespace node helps to keep consistant inode for files across
- * glusterfs (re-)mounts.
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include <signal.h>
-#include <libgen.h>
-#include "compat-errno.h"
-#include "compat.h"
-
-#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
- if (!(_loc && _loc->inode)) { \
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-
-#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
- if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
- if (!_fd) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-
-
-/*
- * unify_normalize_stats -
- */
-void
-unify_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-xlator_t *
-unify_loc_subvol (loc_t *loc, xlator_t *this)
-{
- unify_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int16_t *list = NULL;
- long index = 0;
- xlator_t *subvol_i = NULL;
- int ret = 0;
- uint64_t tmp_list = 0;
-
- priv = this->private;
- subvol = NS (this);
-
- if (!IA_ISDIR (loc->inode->ia_type)) {
- ret = inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
- if (!list)
- goto out;
-
- for (index = 0; list[index] != -1; index++) {
- subvol_i = priv->xl_array[list[index]];
- if (subvol_i != NS (this)) {
- subvol = subvol_i;
- break;
- }
- }
- }
-out:
- return subvol;
-}
-
-
-
-/**
- * unify_statfs_cbk -
- */
-int32_t
-unify_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- int32_t callcnt = 0;
- struct statvfs *dict_buf = NULL;
- unsigned long bsize;
- unsigned long frsize;
- unify_local_t *local = (unify_local_t *)frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs_buf;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- unify_normalize_stats(dict_buf, bsize, frsize);
- unify_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = op_ret;
- } else {
- /* fop on storage node has failed due to some error */
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): %s",
- prev_frame->this->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs_buf);
- }
-
- return 0;
-}
-
-/**
- * unify_statfs -
- */
-int32_t
-unify_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- xlator_list_t *trav = this->children;
-
- INIT_LOCAL (frame, local);
- local->call_count = ((unify_private_t *)this->private)->child_count;
-
- while(trav) {
- STACK_WIND (frame,
- unify_statfs_cbk,
- trav->xlator,
- trav->xlator->fops->statfs,
- loc);
- trav = trav->next;
- }
-
- return 0;
-}
-
-/**
- * unify_buf_cbk -
- */
-int32_t
-unify_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = buf->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (buf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!IA_ISDIR (buf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-#define check_if_dht_linkfile(s) \
- ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) == S_ISVTX)
-
-/**
- * unify_lookup_cbk -
- */
-int32_t
-unify_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
- dict_t *local_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (local->revalidate &&
- (op_errno == ESTALE)) {
- /* ESTALE takes priority */
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (local->op_errno != ESTALE)) {
- /* if local->op_errno is already ESTALE, then
- * ESTALE has to propogated to the parent first.
- * do not enter here.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
-
- } else if (local->revalidate &&
- (local->op_errno != ESTALE) &&
- !(priv->optimist && (op_errno == ENOENT))) {
-
- gf_log (this->name,
- (op_errno == ENOTCONN) ?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (check_if_dht_linkfile(buf)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "file %s may be DHT link file on %s, "
- "make sure the backend is not shared "
- "between unify and DHT",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- }
-
- if (local->stbuf.ia_type && local->stbuf.ia_blksize) {
- /* make sure we already have a stbuf
- stored in local->stbuf */
- if (IA_ISDIR (local->stbuf.ia_type) &&
- !IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on namespace, non-directory "
- "on node '%s', returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- if (!IA_ISDIR (local->stbuf.ia_type) &&
- IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on node '%s', non-directory "
- "on namespace, returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- }
-
- if (!local->revalidate && !IA_ISDIR (buf->ia_type)) {
- /* This is the first time lookup on file*/
- if (!local->list) {
- /* list is not allocated, allocate
- the max possible range */
- local->list = GF_CALLOC (1, 2 * (priv->child_count + 2),
- gf_unify_mt_int16_t);
- if (!local->list) {
- gf_log (this->name,
- GF_LOG_CRITICAL,
- "Not enough memory");
- STACK_UNWIND (frame, -1,
- ENOMEM, inode,
- NULL, NULL, NULL);
- return 0;
- }
- }
- /* update the index of the list */
- local->list [local->index++] =
- (int16_t)(long)cookie;
- }
-
- if (!local->revalidate && IA_ISDIR (buf->ia_type)) {
- /* fresh lookup of a directory */
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- if ((!local->dict) && dict &&
- (priv->xl_array[(long)cookie] != NS(this))) {
- local->dict = dict_ref (dict);
- }
-
- /* index of NS node is == total child count */
- if (priv->child_count == (int16_t)(long)cookie) {
- /* Take the inode number from namespace */
- local->ia_ino = buf->ia_ino;
- if (IA_ISDIR (buf->ia_type) ||
- !(local->stbuf.ia_blksize)) {
- local->stbuf = *buf;
- local->oldpostparent = *postparent;
- }
- } else if (!IA_ISDIR (buf->ia_type)) {
- /* If file, then get the stat from
- storage node */
- local->stbuf = *buf;
- }
-
- if (local->ia_nlink < buf->ia_nlink) {
- local->ia_nlink = buf->ia_nlink;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_dict = local->dict;
- if (local->return_eio) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] Unable to fix the path (%s) with "
- "self-heal, try manual verification. "
- "returning EIO.", local->loc1.path);
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
- if (local_dict) {
- dict_unref (local_dict);
- }
- return 0;
- }
-
- if (!local->stbuf.ia_blksize) {
- /* Inode not present */
- local->op_ret = -1;
- } else {
- if (!local->revalidate &&
- !IA_ISDIR (local->stbuf.ia_type)) {
- /* If its a file, big array is useless,
- allocate the smaller one */
- int16_t *list = NULL;
- list = GF_CALLOC (1, 2 * (local->index + 1),
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- memcpy (list, local->list, 2 * local->index);
- /* Make the end of the list as -1 */
- GF_FREE (local->list);
- local->list = list;
- local->list [local->index] = -1;
- /* Update the inode's ctx with proper array */
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
- }
-
- if (IA_ISDIR(local->loc1.inode->ia_type)) {
- /* lookup is done for directory */
- if (local->failed && priv->self_heal) {
- /* Triggering self-heal */
- /* means, self-heal required for this
- inode */
- local->inode_generation = 0;
- priv->inode_generation++;
- }
- } else {
- local->stbuf.ia_ino = local->ia_ino;
- }
-
- local->stbuf.ia_nlink = local->ia_nlink;
- }
- if (local->op_ret == -1) {
- if (!local->revalidate && local->list)
- GF_FREE (local->list);
- }
-
- if ((local->op_ret >= 0) && local->failed &&
- local->revalidate) {
- /* Done revalidate, but it failed */
- if ((op_errno != ENOTCONN)
- && (local->op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Revalidate failed for path(%s): %s",
- local->loc1.path, strerror (op_errno));
- }
- local->op_ret = -1;
- }
-
- if ((priv->self_heal && !priv->optimist) &&
- (!local->revalidate && (local->op_ret == 0) &&
- IA_ISDIR(local->stbuf.ia_type))) {
- /* Let the self heal be done here */
- zr_unify_self_heal (frame, this, local);
- local_dict = NULL;
- } else {
- if (local->failed) {
- /* NOTE: directory lookup is sent to all
- * subvolumes and success from a subvolume
- * might set local->op_ret to 0 (zero) */
- local->op_ret = -1;
- }
-
- /* either no self heal, or op_ret == -1 (failure) */
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- }
- if (local_dict) {
- dict_unref (local_dict);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_lookup -
- */
-int32_t
-unify_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- long index = 0;
-
- if (!(loc && loc->inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: Argument not right", loc?loc->path:"(null)");
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL, NULL);
- return 0;
- }
-
- if (inode_ctx_get (loc->inode, this, NULL)
- && IA_ISDIR (loc->inode->ia_type)) {
- local->revalidate = 1;
- }
-
- if (!inode_ctx_get (loc->inode, this, NULL) &&
- loc->inode->ia_type &&
- !IA_ISDIR (loc->inode->ia_type)) {
- uint64_t tmp_list = 0;
- /* check if revalidate or fresh lookup */
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
- }
-
- if (local->list) {
- list = local->list;
- for (index = 0; list[index] != -1; index++);
- if (index != 2) {
- if (index < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ESTALE for %s: file "
- "count is %ld", loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, ESTALE,
- NULL, NULL, NULL, NULL);
- return 0;
- } else {
- /* There are more than 2 presences */
- /* Just log and continue */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: file count is %ld",
- loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- }
- }
-
- /* is revalidate */
- local->revalidate = 1;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)(long)list[index], //cookie
- priv->xl_array [list[index]],
- priv->xl_array [list[index]]->fops->lookup,
- loc,
- xattr_req);
- if (need_break)
- break;
- }
- } else {
- if (loc->inode->ia_type) {
- if (inode_ctx_get (loc->inode, this, NULL)) {
- inode_ctx_get (loc->inode, this,
- &local->inode_generation);
- }
- }
- /* This is first call, there is no list */
- /* call count should be all child + 1 namespace */
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index <= priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- loc,
- xattr_req);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_stat - if directory, get the stat directly from NameSpace child.
- * if file, check for a hint and send it only there (also to NS).
- * if its a fresh stat, then do it on all the nodes.
- *
- * NOTE: for all the call, sending cookie as xlator pointer, which will be
- * used in cbk.
- */
-int32_t
-unify_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- local->ia_ino = loc->inode->ino;
- if (IA_ISDIR (loc->inode->ia_type)) {
- /* Directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->stat, loc);
- } else {
- /* File */
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->stat,
- loc);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_access_cbk -
- */
-int32_t
-unify_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_access - Send request to only namespace, which has all the
- * attributes set for the file.
- */
-int32_t
-unify_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame,
- unify_access_cbk,
- NS(this),
- NS(this)->fops->access,
- loc,
- mask);
-
- return 0;
-}
-
-int32_t
-unify_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if ((op_ret == -1) && !(priv->optimist &&
- (op_errno == ENOENT ||
- op_errno == EEXIST))) {
- /* TODO: Decrement the inode_generation of
- * this->inode's parent inode, hence the missing
- * directory is created properly by self-heal.
- * Currently, there is no way to get the parent
- * inode directly.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- if (op_errno != EEXIST)
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = 0;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->failed) {
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_mkdir_cbk -
- */
-int32_t
-unify_ns_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- long index = 0;
-
- if (op_ret == -1) {
- /* No need to send mkdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->name, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->call_count = priv->child_count;
-
- /* Send mkdir request to all the nodes now */
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_mkdir_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->mkdir,
- &local->loc1,
- local->mode);
- }
-
- return 0;
-}
-
-
-/**
- * unify_mkdir -
- */
-int32_t
-unify_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
-
- loc_copy (&local->loc1, loc);
-
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mkdir_cbk,
- NS(this),
- NS(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-/**
- * unify_rmdir_cbk -
- */
-int32_t
-unify_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_rmdir_cbk -
- */
-int32_t
-unify_ns_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* No need to send rmdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name,
- ((op_errno != ENOTEMPTY) ?
- GF_LOG_ERROR : GF_LOG_DEBUG),
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
- return 0;
- }
-
- local->call_count = priv->child_count;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rmdir_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rmdir,
- &local->loc1);
- }
-
- return 0;
-}
-
-/**
- * unify_rmdir -
- */
-int32_t
-unify_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_rmdir_cbk,
- NS(this),
- NS(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-/**
- * unify_open_cbk -
- */
-int32_t
-unify_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in
- all the f*** / FileIO calls */
- fd_ctx_set (fd, this, (uint64_t)(long)cookie);
- }
- }
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if ((local->failed == 1) && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- //local->op_errno = EIO;
-
- if (!fd_ctx_get (local->fd, this, NULL)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on child node, "
- "failed on namespace");
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on namespace, "
- "failed on child node");
- }
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_open_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->index++;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->list[0] = (int16_t)(long)cookie;
- } else {
- local->list[1] = (int16_t)(long)cookie;
- }
- if (IA_ISDIR (buf->ia_type))
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- file_list[0] = local->list[0];
- file_list[1] = local->list[1];
- file_list[2] = -1;
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->name, local->index);
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning as file found on less "
- "than 2 nodes");
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- return 0;
- }
- }
-
- if (local->failed) {
- /* Open on directory, return EISDIR */
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EISDIR, local->fd);
- return 0;
- }
-
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, local->wbflags);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_open_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_UNWIND (frame, -1, ENOENT);
- return 0;
- }
-
- if (path[0] == '/') {
- local->name = gf_strdup (path);
- ERR_ABORT (local->name);
- } else {
- char *tmp_str = gf_strdup (local->loc1.path);
- char *tmp_base = dirname (tmp_str);
- local->name = GF_CALLOC (1, ZR_PATH_MAX, gf_unify_mt_char);
- strcpy (local->name, tmp_base);
- strncat (local->name, "/", 1);
- strcat (local->name, path);
- GF_FREE (tmp_str);
- }
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send the lookup to all the nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_open_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
-
- return 0;
-}
-#endif /* GF_DARWIN_HOST_OS */
-
-/**
- * unify_open -
- */
-int32_t
-unify_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t file_list[3] = {0,};
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Init */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->fd = fd;
- local->flags = flags;
- local->wbflags = wbflags;
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- local->list = list;
- file_list[0] = priv->child_count; /* Thats namespace */
- file_list[2] = -1;
- for (index = 0; list[index] != -1; index++) {
- local->call_count++;
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->call_count != 2) {
- /* If the lookup was done for file */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: entry_count is %d",
- loc->path, local->call_count);
- for (index = 0; local->list[index] != -1; index++)
- gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
- loc->path, priv->xl_array[list[index]]->name);
-
- if (local->call_count < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on onlyone node");
- STACK_UNWIND (frame, -1, EIO, fd);
- return 0;
- }
- }
-
-#ifdef GF_DARWIN_HOST_OS
- /* Handle symlink here */
- if (IA_ISLNK (loc->inode->ia_type)) {
- /* Callcount doesn't matter here */
- STACK_WIND (frame,
- unify_open_readlink_cbk,
- NS(this),
- NS(this)->fops->readlink,
- loc, ZR_PATH_MAX);
- return 0;
- }
-#endif /* GF_DARWIN_HOST_OS */
-
- local->call_count = 2;
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]], //cookie
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- loc,
- flags,
- fd, wbflags);
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-
-int32_t
-unify_create_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- inode_t *inode = local->loc1.inode;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_create_open_cbk -
- */
-int32_t
-unify_create_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int ret = 0;
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_value = 0;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in all
- the f*** / FileIO calls */
- /* TODO: log on failure */
- ret = fd_ctx_get (fd, this, &tmp_value);
- cookie = (void *)(long)tmp_value;
- } else {
- /* NOTE: open successful on namespace.
- * fd's ctx can be used to identify open
- * failure on storage subvolume. cool
- * ide ;) */
- local->failed = 0;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- ((xlator_t *)cookie)->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed == 1 && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- local->op_errno = EIO;
- local->fd = fd;
- local->call_count = 1;
-
- if (!fd_ctx_get (local->fd, this, &tmp_value)) {
- child = (xlator_t *)(long)tmp_value;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on child node, "
- "failed on namespace");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- child,
- child->fops->unlink,
- &local->loc1);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on namespace, "
- "failed on child node");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- }
- return 0;
- }
- inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
- return 0;
-}
-
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_create_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->list[local->index++] = (int16_t)(long)cookie;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->ia_ino = buf->ia_ino;
- } else {
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t *list = local->list;
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- local->list [local->index] = -1;
- file_list[0] = list[0];
- file_list[1] = list[1];
- file_list[2] = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->loc1.path, local->index);
- file_list[0] = priv->child_count;
- for (index = 0; list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", local->loc1.path,
- priv->xl_array[list[index]]->name);
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on "
- "only one node");
- STACK_UNWIND (frame, -1, EIO,
- local->fd, inode, NULL,
- NULL, NULL);
- return 0;
- }
- }
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_create_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, 0);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_create_cbk -
- */
-int32_t
-unify_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- inode_t *tmp_inode = NULL;
-
- if (op_ret == -1) {
- /* send unlink () on Namespace */
- local->op_errno = op_errno;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s (file %s, error %s), "
- "sending unlink to namespace",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->stbuf = *buf;
- /* Just inode number should be from NS node */
- local->stbuf.ia_ino = local->ia_ino;
-
- /* TODO: log on failure */
- ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_create_cbk -
- *
- */
-int32_t
-unify_ns_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send create request to other servers, as
- namespace action failed. Handle exclusive create here. */
- if ((op_errno != EEXIST) ||
- ((op_errno == EEXIST) &&
- ((local->flags & O_EXCL) == O_EXCL))) {
- /* If its just a create call without O_EXCL,
- don't do this */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
- }
- }
-
- if (op_ret >= 0) {
- /* Get the inode number from the NS node */
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->op_ret = -1;
-
- /* Start the mapping list */
- list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- inode_ctx_put (inode, this, (uint64_t)(long)list);
- list[0] = priv->child_count;
- list[2] = -1;
-
- /* This means, file doesn't exist anywhere in the Filesystem */
- sched_ops = priv->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (sched_xl == NULL)
- {
- /* send unlink () on Namespace */
- local->op_errno = ENOTCONN;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "no node online to schedule create:(file %s) "
- "sending unlink to namespace",
- (local->loc1.path)?local->loc1.path:"");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_create_cbk,
- sched_xl, sched_xl->fops->create,
- &local->loc1, local->flags, local->mode, fd);
- } else {
- /* File already exists, and there is no O_EXCL flag */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "File(%s) already exists on namespace, sending "
- "open instead", local->loc1.path);
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send lookup() to all nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_create_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
- }
- return 0;
-}
-
-/**
- * unify_create - create a file in global namespace first, so other
- * clients can see them. Create the file in storage nodes in background.
- */
-int32_t
-unify_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->flags = flags;
- local->fd = fd;
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_create_cbk,
- NS(this),
- NS(this)->fops->create,
- loc,
- flags | O_EXCL,
- mode,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_opendir_cbk -
- */
-int32_t
-unify_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-/**
- * unify_opendir -
- */
-int32_t
-unify_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame, unify_opendir_cbk,
- NS(this), NS(this)->fops->opendir, loc, fd);
-
- return 0;
-}
-
-
-int32_t
-unify_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = statpost->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (statpost->ia_type) ||
- !local->stpost.ia_blksize) {
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
-
- if ((!IA_ISDIR (statpost->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stpre.ia_ino = local->ia_ino;
- local->stpost.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stpre, &local->stpost);
- }
-
- return 0;
-}
-
-
-int32_t
-unify_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- if (!(loc && loc->inode)) {
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setattr_cbk,
- NS (this),
- NS (this)->fops->setattr,
- loc, stbuf, valid);
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_setattr_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->setattr,
- loc, stbuf, valid);
-
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_setattr_cbk, child,
- child->fops->fsetattr, fd, stbuf, valid);
-
- STACK_WIND (frame, unify_setattr_cbk, NS(this),
- NS(this)->fops->fsetattr, fd, stbuf, valid);
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, unify_setattr_cbk,
- NS(this), NS(this)->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate_cbk -
- */
-int32_t
-unify_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- local->op_errno = op_errno;
- if (!((op_errno == ENOENT) && priv->optimist))
- local->op_ret = -1;
- }
-
- if (op_ret >= 0) {
- if (NS (this) == prev_frame->this) {
- local->ia_ino = postbuf->ia_ino;
- /* If the entry is directory, get the
- stat from NS node */
- if (IA_ISDIR (postbuf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
-
- if ((!IA_ISDIR (postbuf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from
- Storage node. */
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->ia_ino) {
- local->stbuf.ia_ino = local->ia_ino;
- local->poststbuf.ia_ino = local->ia_ino;
- } else {
- local->op_ret = -1;
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->poststbuf);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate -
- */
-int32_t
-unify_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->ia_ino = loc->inode->ino;
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_truncate_cbk,
- NS(this),
- NS(this)->fops->truncate,
- loc,
- 0);
- } else {
- local->op_ret = 0;
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- /* Don't send offset to NS truncate */
- STACK_WIND (frame, unify_truncate_cbk, NS(this),
- NS(this)->fops->truncate, loc, 0);
- callcnt--;
-
- for (index = 0; local->list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[local->list[index]]) {
- STACK_WIND (frame,
- unify_truncate_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->truncate,
- loc,
- offset);
- if (!--callcnt)
- break;
- }
- }
- }
-
- return 0;
-}
-
-/**
- * unify_readlink_cbk -
- */
-int32_t
-unify_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
- return 0;
-}
-
-/**
- * unify_readlink - Read the link only from the storage node.
- */
-int32_t
-unify_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- unify_private_t *priv = this->private;
- int32_t entry_count = 0;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- entry_count++;
-
- if (entry_count >= 2) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_readlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->readlink,
- loc,
- size);
- break;
- }
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ENOENT, no softlink files found "
- "on storage node");
- STACK_UNWIND (frame, -1, ENOENT, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink_cbk -
- */
-int32_t
-unify_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- if (((call_frame_t *)cookie)->this == NS(this)) {
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink -
- */
-int32_t
-unify_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_unlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->unlink,
- loc);
- if (need_break)
- break;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: returning ENOENT", loc->path);
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_readv_cbk -
- */
-int32_t
-unify_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-/**
- * unify_readv -
- */
-int32_t
-unify_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_readv_cbk,
- child,
- child->fops->readv,
- fd,
- size,
- offset);
-
-
- return 0;
-}
-
-/**
- * unify_writev_cbk -
- */
-int32_t
-unify_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- unify_local_t *local = NULL;
-
- local = frame->local;
-
- local->stbuf = *prebuf;
- local->stbuf.ia_ino = local->ia_ino;
-
- local->poststbuf = *postbuf;
- local->poststbuf.ia_ino = local->ia_ino;
-
- STACK_UNWIND (frame, op_ret, op_errno,
- &local->stbuf, &local->poststbuf);
- return 0;
-}
-
-/**
- * unify_writev -
- */
-int32_t
-unify_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
- unify_local_t *local = NULL;
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_writev_cbk,
- child,
- child->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
-
- return 0;
-}
-
-/**
- * unify_ftruncate -
- */
-int32_t
-unify_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- xlator_t *child = NULL;
- unify_local_t *local = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->op_ret = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_truncate_cbk,
- child, child->fops->ftruncate,
- fd, offset);
-
- STACK_WIND (frame, unify_truncate_cbk,
- NS(this), NS(this)->fops->ftruncate,
- fd, 0);
-
- return 0;
-}
-
-
-/**
- * unify_flush_cbk -
- */
-int32_t
-unify_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_flush -
- */
-int32_t
-unify_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_flush_cbk, child,
- child->fops->flush, fd);
-
- return 0;
-}
-
-
-/**
- * unify_fsync_cbk -
- */
-int32_t
-unify_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-/**
- * unify_fsync -
- */
-int32_t
-unify_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fsync_cbk, child,
- child->fops->fsync, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
- * both namespace and the storage node if its a file.
- */
-int32_t
-unify_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fstat, fd);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-/**
- * unify_getdents_cbk -
- */
-int32_t
-unify_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- return 0;
-}
-
-/**
- * unify_getdents - send the FOP request to all the nodes.
- */
-int32_t
-unify_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_getdents_cbk, NS(this),
- NS(this)->fops->getdents, fd, size, offset, flag);
-
- return 0;
-}
-
-
-/**
- * unify_readdir_cbk -
- */
-int32_t
-unify_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-/**
- * unify_readdir - send the FOP request to all the nodes.
- */
-int32_t
-unify_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdir_cbk, NS(this),
- NS(this)->fops->readdir, fd, size, offset);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdirp_cbk, NS(this),
- NS(this)->fops->readdirp, fd, size, offset);
-
- return 0;
-}
-
-
-/**
- * unify_fsyncdir_cbk -
- */
-int32_t
-unify_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/**
- * unify_fsyncdir -
- */
-int32_t
-unify_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_fsyncdir_cbk,
- NS(this), NS(this)->fops->fsyncdir, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_lk_cbk - UNWIND frame with the proper return arguments.
- */
-int32_t
-unify_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-/**
- * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
- */
-int32_t
-unify_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_lk_cbk, child,
- child->fops->lk, fd, cmd, lock);
-
- return 0;
-}
-
-
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-static int32_t
-unify_setxattr_file_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_private_t *private = this->private;
- unify_local_t *local = frame->local;
- xlator_t *sched_xl = NULL;
- struct sched_ops *sched_ops = NULL;
-
- if (op_ret == -1) {
- if (!ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr with XATTR_CREATE on ns: "
- "path(%s) key(%s): %s",
- local->loc1.path, local->name,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- LOCK (&frame->lock);
- {
- local->failed = 0;
- local->op_ret = 0;
- local->op_errno = 0;
- local->call_count = 1;
- }
- UNLOCK (&frame->lock);
-
- /* schedule XATTR_CREATE on one of the child node */
- sched_ops = private->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->name);
- if (!sched_xl) {
- STACK_UNWIND (frame, -1, ENOTCONN);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_setxattr_cbk,
- sched_xl,
- sched_xl->fops->setxattr,
- &local->loc1,
- local->dict,
- local->flags);
- return 0;
-}
-
-/**
- * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
- */
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- dict_t *dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, (((op_errno == ENOENT) ||
- (op_errno == ENOTSUP))?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- if (local->failed == -1) {
- local->failed = 1;
- }
- local->op_errno = op_errno;
- } else {
- local->failed = 0;
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed && local->name &&
- ZR_FILE_CONTENT_REQUEST(local->name)) {
- dict = get_new_dict ();
- dict_set (dict, local->dict->members_list->key,
- data_from_dynptr(NULL, 0));
- dict_ref (dict);
-
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setxattr_file_cbk,
- NS(this),
- NS(this)->fops->setxattr,
- &local->loc1,
- dict,
- XATTR_CREATE);
-
- dict_unref (dict);
- return 0;
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_sexattr - This function should be sent to all the storage nodes,
- * which contains the file, (excluding namespace).
- */
-int32_t
-unify_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
- data_pair_t *trav = dict->members_list;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->failed = -1;
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
-
- if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
- /* direct the storage xlators to change file
- content only if file exists */
- local->flags = flags;
- local->dict = dict;
- local->name = gf_strdup (trav->key);
- flags |= XATTR_REPLACE;
- }
-
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setxattr,
- loc, dict, flags);
- }
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->setxattr,
- loc,
- dict,
- flags);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- /* No entry in storage nodes */
- gf_log (this->name, GF_LOG_DEBUG,
- "returning ENOENT, file not found on storage node.");
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-/**
- * unify_getxattr_cbk - This function is called from only one child, so, no
- * need of any lock or anything else, just send it to above layer
- */
-int32_t
-unify_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value)
-{
- int32_t callcnt = 0;
- dict_t *local_value = NULL;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name,
- (((op_errno == ENOENT) ||
- (op_errno == ENODATA) ||
- (op_errno == ENOTSUP)) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- } else {
- if (!local->dict)
- local->dict = dict_ref (value);
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_value = local->dict;
- local->dict = NULL;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local_value);
-
- if (local_value)
- dict_unref (local_value);
- }
-
- return 0;
-}
-
-
-/**
- * unify_getxattr - This FOP is sent to only the storage node.
- */
-int32_t
-unify_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t count = 0;
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getxattr,
- loc,
- name);
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- count++;
- }
- }
-
- if (count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->getxattr,
- loc,
- name);
- if (!--count)
- break;
- }
- }
- } else {
- dict_t *tmp_dict = get_new_dict ();
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENODATA, no file found on storage node",
- loc->path);
- STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
- dict_destroy (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr_cbk - Wait till all the child node returns the call
- * and then UNWIND to above layer.
- */
-int32_t
-unify_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- if (op_errno != ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
- } else {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr - Send it to all the child nodes which has the files.
- */
-int32_t
-unify_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->removexattr,
- loc,
- name);
-
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->removexattr,
- loc,
- name);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENOENT, not found on storage node.", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-int32_t
-unify_mknod_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- /* No log required here as this -1 is for mknod call */
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_mknod_cbk -
- */
-int32_t
-unify_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, sending unlink to "
- "namespace");
- local->op_errno = op_errno;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- return 0;
-}
-
-/**
- * unify_ns_mknod_cbk -
- */
-int32_t
-unify_ns_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- /* No need to send mknod request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name, local->loc1.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count;
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send mknod request to scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, no node online "
- "at the moment, sending unlink to NS");
- local->op_errno = ENOTCONN;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_mknod_cbk,
- sched_xl, sched_xl->fops->mknod,
- &local->loc1, local->mode, local->dev);
-
- return 0;
-}
-
-/**
- * unify_mknod - Create a device on namespace first, and later create on
- * the storage node.
- */
-int32_t
-unify_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->dev = rdev;
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mknod_cbk,
- NS(this),
- NS(this)->fops->mknod,
- loc,
- mode,
- rdev);
-
- return 0;
-}
-
-int32_t
-unify_symlink_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_symlink_cbk -
- */
-int32_t
-unify_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, sending unlink "
- "to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_symlink_cbk -
- */
-int32_t
-unify_ns_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
-
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- int16_t *list = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send symlink request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Start the mapping list */
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count; //namespace's index
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send symlink request to all the nodes now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, no node online, "
- "sending unlink to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame,
- unify_symlink_cbk,
- sched_xl,
- sched_xl->fops->symlink,
- local->name,
- &local->loc1);
-
- return 0;
-}
-
-/**
- * unify_symlink -
- */
-int32_t
-unify_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->name = gf_strdup (linkpath);
-
- if ((local->name == NULL) ||
- (local->loc1.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_symlink_cbk,
- NS(this),
- NS(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-
-int32_t
-unify_rename_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
-}
-
-int32_t
-unify_ns_rename_undo_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- }
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
-}
-
-int32_t
-unify_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (!IA_ISDIR (buf->ia_type))
- local->stbuf = *buf;
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- if (IA_ISDIR (local->loc1.inode->ia_type)) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->oldpreparent,
- &local->oldpostparent, &local->newpreparent,
- &local->newpostparent);
- return 0;
- }
-
- if (local->op_ret == -1) {
- /* TODO: check this logic */
-
- /* Rename failed in storage node, successful on NS,
- * hence, rename back the entries in NS */
- /* NOTE: this will be done only if the destination
- * doesn't exists, if the destination exists, the
- * job of correcting NS is left to self-heal
- */
- if (!local->index) {
- loc_t tmp_oldloc = {
- /* its actual 'newloc->path' */
- .path = local->loc2.path,
- .inode = local->loc1.inode,
- .parent = local->loc2.parent
- };
-
- loc_t tmp_newloc = {
- /* Actual 'oldloc->path' */
- .path = local->loc1.path,
- .parent = local->loc1.parent
- };
-
- gf_log (this->name, GF_LOG_ERROR,
- "rename succussful on namespace, on "
- "stroage node failed, reverting back");
-
- STACK_WIND (frame,
- unify_ns_rename_undo_cbk,
- NS(this),
- NS(this)->fops->rename,
- &tmp_oldloc,
- &tmp_newloc);
- return 0;
- }
- } else {
- /* Rename successful on storage nodes */
-
- int32_t idx = 0;
- int16_t *tmp_list = NULL;
- uint64_t tmp_list_int64 = 0;
- if (local->loc2.inode) {
- inode_ctx_get (local->loc2.inode,
- this, &tmp_list_int64);
- list = (int16_t *)(long)tmp_list_int64;
-
- }
-
- if (list) {
- for (index = 0; list[index] != -1; index++);
- tmp_list = GF_CALLOC (1, index * 2,
- gf_unify_mt_int16_t);
- memcpy (tmp_list, list, index * 2);
-
- for (index = 0; list[index] != -1; index++) {
- /* TODO: Check this logic. */
- /* If the destination file exists in
- * the same storage node where we sent
- * 'rename' call, no need to send
- * unlink
- */
- for (idx = 0;
- local->list[idx] != -1; idx++) {
- if (tmp_list[index] == local->list[idx]) {
- tmp_list[index] = priv->child_count;
- continue;
- }
- }
-
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- if (callcnt > 1)
- gf_log (this->name,
- GF_LOG_ERROR,
- "%s->%s: more (%d) "
- "subvolumes have the "
- "newloc entry",
- local->loc1.path,
- local->loc2.path,
- callcnt);
-
- for (index=0;
- tmp_list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- STACK_WIND (frame,
- unify_rename_unlink_cbk,
- priv->xl_array[tmp_list[index]],
- priv->xl_array[tmp_list[index]]->fops->unlink,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
-
- GF_FREE (tmp_list);
- return 0;
- }
- if (tmp_list)
- GF_FREE (tmp_list);
- }
- }
-
- /* Need not send 'unlink' to storage node */
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent,
- &local->newpreparent, &local->newpostparent);
- }
-
- return 0;
-}
-
-int32_t
-unify_ns_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Free local->new_inode */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
- }
-
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preoldparent;
- local->oldpostparent = *postoldparent;
- local->newpreparent = *prenewparent;
- local->newpostparent = *postnewparent;
-
- /* Everything is fine. */
- if (IA_ISDIR (buf->ia_type)) {
- local->call_count = priv->child_count;
- for (index=0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rename,
- &local->loc1,
- &local->loc2);
- }
-
- return 0;
- }
-
- local->call_count = 0;
- /* send rename */
- list = local->list;
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->rename,
- &local->loc1,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
- } else {
- /* file doesn't seem to be present in storage nodes */
- gf_log (this->name, GF_LOG_CRITICAL,
- "CRITICAL: source file not in storage node, "
- "rename successful on namespace :O");
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- }
- return 0;
-}
-
-
-/**
- * unify_rename - One of the tricky function. The deadliest of all :O
- */
-int32_t
-unify_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- if ((local->loc1.path == NULL) ||
- (local->loc2.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- return 0;
- }
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_rename_cbk,
- NS(this),
- NS(this)->fops->rename,
- oldloc,
- newloc);
- return 0;
-}
-
-/**
- * unify_link_cbk -
- */
-int32_t
-unify_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret >= 0)
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_link_cbk -
- */
-int32_t
-unify_ns_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- int16_t *list = local->list;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send link request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Update inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Send link request to the node now */
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- if (priv->xl_array[list[index]] != NS (this)) {
- STACK_WIND (frame,
- unify_link_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->link,
- &local->loc1,
- &local->loc2);
- break;
- }
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-/**
- * unify_link -
- */
-int32_t
-unify_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_link_cbk,
- NS(this),
- NS(this)->fops->link,
- oldloc,
- newloc);
-
- return 0;
-}
-
-
-/**
- * unify_checksum_cbk -
- */
-int32_t
-unify_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
-
- return 0;
-}
-
-/**
- * unify_checksum -
- */
-int32_t
-unify_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- STACK_WIND (frame,
- unify_checksum_cbk,
- NS(this),
- NS(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-/**
- * unify_finodelk_cbk -
- */
-int
-unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_finodelk
- */
-int
-unify_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int cmd, struct gf_flock *flock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_finodelk_cbk,
- child, child->fops->finodelk,
- volume, fd, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_fentrylk_cbk -
- */
-int
-unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fentrylk
- */
-int
-unify_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fentrylk_cbk,
- child, child->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_fxattrop_cbk -
- */
-int
-unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_fxattrop
- */
-int
-unify_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fxattrop_cbk,
- child, child->fops->fxattrop,
- fd, optype, xattr);
-
- return 0;
-}
-
-
-/**
- * unify_inodelk_cbk -
- */
-int
-unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_inodelk
- */
-int
-unify_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int cmd, struct gf_flock *flock)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_inodelk_cbk,
- child, child->fops->inodelk,
- volume, loc, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_entrylk_cbk -
- */
-int
-unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_entrylk
- */
-int
-unify_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_entrylk_cbk,
- child, child->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_xattrop_cbk -
- */
-int
-unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_xattrop
- */
-int
-unify_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_xattrop_cbk,
- child, child->fops->xattrop,
- loc, optype, xattr);
-
- return 0;
-}
-
-int
-unify_forget (xlator_t *this,
- inode_t *inode)
-{
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- if (inode->ia_type && (!IA_ISDIR(inode->ia_type))) {
- inode_ctx_get (inode, this, &tmp_list);
- if (tmp_list) {
- list = (int16_t *)(long)tmp_list;
- GF_FREE (list);
- }
- }
-
- return 0;
-}
-
-/**
- * notify
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- unify_private_t *priv = this->private;
- struct sched_ops *sched = NULL;
-
- if (!priv) {
- return 0;
- }
-
- sched = priv->sched_ops;
- if (!sched) {
- gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
- raise (SIGTERM);
- return 0;
- }
- if (priv->namespace == data) {
- if (event == GF_EVENT_CHILD_UP) {
- sched->notify (this, event, data);
- }
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- /* Call scheduler's update () to enable it for scheduling */
- sched->notify (this, event, data);
-
- LOCK (&priv->lock);
- {
- /* Increment the inode's generation, which is
- used for self_heal */
- ++priv->inode_generation;
- ++priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (!priv->is_up) {
- default_notify (this, event, data);
- priv->is_up = 1;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* Call scheduler's update () to disable the child node
- * for scheduling
- */
- sched->notify (this, event, data);
- LOCK (&priv->lock);
- {
- --priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (priv->num_child_up == 0) {
- /* Send CHILD_DOWN to upper layer */
- default_notify (this, event, data);
- priv->is_up = 0;
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_unify_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * init - This function is called first in the xlator, while initializing.
- * All the config file options are checked and appropriate flags are set.
- *
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = 0;
- int32_t count = 0;
- data_t *scheduler = NULL;
- data_t *data = NULL;
- xlator_t *ns_xl = NULL;
- xlator_list_t *trav = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t *parent = NULL;
- unify_private_t *_private = NULL;
-
-
- /* Check for number of child nodes, if there is no child nodes, exit */
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "No child nodes specified. check \"subvolumes \" "
- "option in volfile");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- /* Check for 'scheduler' in volume */
- scheduler = dict_get (this->options, "scheduler");
- if (!scheduler) {
- gf_log (this->name, GF_LOG_ERROR,
- "\"option scheduler <x>\" is missing in volfile");
- return -1;
- }
-
- /* Setting "option namespace <node>" */
- data = dict_get (this->options, "namespace");
- if(!data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace option not specified, Exiting");
- return -1;
- }
- /* Search namespace in the child node, if found, exit */
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, data->data) == 0)
- break;
- trav = trav->next;
- }
- if (trav) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node used as a subvolume, Exiting");
- return -1;
- }
-
- /* Search for the namespace node, if found, continue */
- ns_xl = this->next;
- while (ns_xl) {
- if (strcmp (ns_xl->name, data->data) == 0)
- break;
- ns_xl = ns_xl->next;
- }
- if (!ns_xl) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node not found in volfile, Exiting");
- return -1;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "namespace node specified as %s", data->data);
-
- _private = GF_CALLOC (1, sizeof (*_private),
- gf_unify_mt_unify_private_t);
- ERR_ABORT (_private);
- _private->sched_ops = get_scheduler (this, scheduler->data);
- if (!_private->sched_ops) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Error while loading scheduler. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- if (ns_xl->parents) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Namespace node should not be a child of any other node. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- _private->namespace = ns_xl;
-
- /* update _private structure */
- {
- count = 0;
- trav = this->children;
- /* Get the number of child count */
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Child node count is %d", count);
-
- _private->child_count = count;
- if (count == 1) {
- /* TODO: Should I error out here? */
- gf_log (this->name, GF_LOG_CRITICAL,
- "WARNING: You have defined only one "
- "\"subvolumes\" for unify volume. It may not "
- "be the desired config, review your volume "
- "volfile. If this is how you are testing it,"
- " you may hit some performance penalty");
- }
-
- _private->xl_array = GF_CALLOC (1,
- sizeof (xlator_t) * (count + 1),
- gf_unify_mt_xlator_t);
- ERR_ABORT (_private->xl_array);
-
- count = 0;
- trav = this->children;
- while (trav) {
- _private->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
- _private->xl_array[count] = _private->namespace;
-
- /* self-heal part, start with generation '1' */
- _private->inode_generation = 1;
- /* Because, Foreground part is tested well */
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
- data = dict_get (this->options, "self-heal");
- if (data) {
- if (strcasecmp (data->data, "off") == 0)
- _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
-
- if (strcasecmp (data->data, "foreground") == 0)
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
-
- if (strcasecmp (data->data, "background") == 0)
- _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
- }
-
- /* optimist - ask bulde for more about it */
- data = dict_get (this->options, "optimist");
- if (data) {
- if (gf_string2boolean (data->data,
- &_private->optimist) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "optimist excepts only boolean "
- "options");
- }
- }
-
- LOCK_INIT (&_private->lock);
- }
-
- /* Now that everything is fine. */
- this->private = (void *)_private;
- {
- ret = _private->sched_ops->mem_acct_init (this);
-
- if (ret == -1) {
- return -1;
- }
-
- /* Initialize scheduler, if everything else is successful */
- ret = _private->sched_ops->init (this);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Initializing scheduler failed, Exiting");
- GF_FREE (_private);
- return -1;
- }
-
-
- ret = 0;
-
- /* This section is required because some fops may look
- * for 'xl->parent' variable
- */
- xlparent = GF_CALLOC (1, sizeof (*xlparent),
- gf_unify_mt_xlator_list_t);
- xlparent->xlator = this;
- if (!ns_xl->parents) {
- ns_xl->parents = xlparent;
- } else {
- parent = ns_xl->parents;
- while (parent->next)
- parent = parent->next;
- parent->next = xlparent;
- }
- }
-
- /* Tell namespace node that init is done */
- xlator_notify (ns_xl, GF_EVENT_PARENT_UP, this);
-
- return 0;
-}
-
-/**
- * fini - Free all the allocated memory
- */
-void
-fini (xlator_t *this)
-{
- unify_private_t *priv = this->private;
- priv->sched_ops->fini (this);
- this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv->xl_array);
- GF_FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .stat = unify_stat,
- .readlink = unify_readlink,
- .mknod = unify_mknod,
- .mkdir = unify_mkdir,
- .unlink = unify_unlink,
- .rmdir = unify_rmdir,
- .symlink = unify_symlink,
- .rename = unify_rename,
- .link = unify_link,
- .truncate = unify_truncate,
- .create = unify_create,
- .open = unify_open,
- .readv = unify_readv,
- .writev = unify_writev,
- .statfs = unify_statfs,
- .flush = unify_flush,
- .fsync = unify_fsync,
- .setxattr = unify_setxattr,
- .getxattr = unify_getxattr,
- .removexattr = unify_removexattr,
- .opendir = unify_opendir,
- .readdir = unify_readdir,
- .readdirp = unify_readdirp,
- .fsyncdir = unify_fsyncdir,
- .access = unify_access,
- .ftruncate = unify_ftruncate,
- .fstat = unify_fstat,
- .lk = unify_lk,
- .lookup = unify_lookup,
- .getdents = unify_getdents,
- .checksum = unify_checksum,
- .inodelk = unify_inodelk,
- .finodelk = unify_finodelk,
- .entrylk = unify_entrylk,
- .fentrylk = unify_fentrylk,
- .xattrop = unify_xattrop,
- .fxattrop = unify_fxattrop,
- .setattr = unify_setattr,
- .fsetattr = unify_fsetattr,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = unify_forget,
-};
-
-struct volume_options options[] = {
- { .key = { "namespace" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler" },
- .value = { "alu", "rr", "random", "nufa", "switch" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"self-heal"},
- .value = { "foreground", "background", "off" },
- .type = GF_OPTION_TYPE_STR
- },
- /* TODO: remove it some time later */
- { .key = {"optimist"},
- .type = GF_OPTION_TYPE_BOOL
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
deleted file mode 100644
index dbd5e44a2..000000000
--- a/xlators/cluster/unify/src/unify.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _UNIFY_H
-#define _UNIFY_H
-
-#include "scheduler.h"
-#include "list.h"
-#include "unify-mem-types.h"
-
-#define MAX_DIR_ENTRY_STRING (32 * 1024)
-
-#define ZR_UNIFY_SELF_HEAL_OFF 0
-#define ZR_UNIFY_FG_SELF_HEAL 1
-#define ZR_UNIFY_BG_SELF_HEAL 2
-
-/* Sometimes one should use completely random numbers.. its good :p */
-#define UNIFY_SELF_HEAL_GETDENTS_COUNT 512
-
-#define NS(xl) (((unify_private_t *)xl->private)->namespace)
-
-/* This is used to allocate memory for local structure */
-#define INIT_LOCAL(fr, loc) \
-do { \
- loc = GF_CALLOC (1, sizeof (unify_local_t), gf_unify_mt_unify_local_t); \
- ERR_ABORT (loc); \
- if (!loc) { \
- STACK_UNWIND (fr, -1, ENOMEM); \
- return 0; \
- } \
- fr->local = loc; \
- loc->op_ret = -1; \
- loc->op_errno = ENOENT; \
-} while (0)
-
-
-
-struct unify_private {
- /* Update this structure depending on requirement */
- void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
- if xlator is using scheduler */
- struct sched_ops *sched_ops; /* Scheduler options */
- xlator_t *namespace; /* ptr to namespace xlator */
- xlator_t **xl_array;
- gf_boolean_t optimist;
- int16_t child_count;
- int16_t num_child_up;
- uint8_t self_heal;
- uint8_t is_up;
- uint64_t inode_generation;
- gf_lock_t lock;
-};
-typedef struct unify_private unify_private_t;
-
-struct unify_self_heal_struct {
- uint8_t dir_checksum[NAME_MAX];
- uint8_t ns_dir_checksum[NAME_MAX];
- uint8_t file_checksum[NAME_MAX];
- uint8_t ns_file_checksum[NAME_MAX];
- off_t *offset_list;
- int *count_list;
- dir_entry_t **entry_list;
-};
-
-
-struct _unify_local_t {
- int32_t call_count;
- int32_t op_ret;
- int32_t op_errno;
- mode_t mode;
- off_t offset;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- int32_t flags;
- int32_t entry_count;
- int32_t count; // dir_entry_t count;
- fd_t *fd;
- struct iatt stbuf;
- struct iatt stpre;
- struct iatt stpost;
- struct statvfs statvfs_buf;
- struct timespec tv[2];
- char *name;
- int32_t revalidate;
-
- ino_t ia_ino;
- nlink_t ia_nlink;
-
- dict_t *dict;
-
- int16_t *list;
- int16_t *new_list; /* Used only in case of rename */
- int16_t index;
-
- int32_t failed;
- int32_t return_eio; /* Used in case of different st-mode
- present for a given path */
-
- uint64_t inode_generation; /* used to store the per directory
- * inode_generation. Got from inode's ctx
- * of directory inodes
- */
-
- struct unify_self_heal_struct *sh_struct;
- loc_t loc1, loc2;
-
- struct iatt poststbuf;
- /* When not used for rename, old*
- * are used as the attrs for the current
- * parent directory.
- */
- struct iatt oldpreparent;
- struct iatt oldpostparent;
- struct iatt newpreparent;
- struct iatt newpostparent;
- int32_t wbflags;
-};
-typedef struct _unify_local_t unify_local_t;
-
-int32_t zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local);
-
-#endif /* _UNIFY_H */
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
index df9080358..5075c59a8 100644
--- a/xlators/debug/error-gen/src/Makefile.am
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -2,15 +2,16 @@
xlator_LTLIBRARIES = error-gen.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-error_gen_la_LDFLAGS = -module -avoidversion
+error_gen_la_LDFLAGS = -module -avoid-version
error_gen_la_SOURCES = error-gen.c
error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = error-gen.h error-gen-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
index b643dc5f7..f02280535 100644
--- a/xlators/debug/error-gen/src/error-gen-mem-types.h
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ERROR_GEN_MEM_TYPES_H__
#define __ERROR_GEN_MEM_TYPES_H__
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index 909dc681e..ec0874b35 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -24,6 +14,7 @@
#include "xlator.h"
#include "error-gen.h"
+#include "statedump.h"
sys_error_t error_no_list[] = {
[GF_FOP_LOOKUP] = { .error_no_count = 4,
@@ -91,9 +82,10 @@ sys_error_t error_no_list[] = {
[GF_FOP_READ] = { .error_no_count = 5,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
ENAMETOOLONG}},
- [GF_FOP_WRITE] = { .error_no_count = 5,
+ [GF_FOP_WRITE] = { .error_no_count = 7,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
- ENAMETOOLONG}},
+ ENAMETOOLONG,ENOSPC,
+ GF_ERROR_SHORT_WRITE}},
[GF_FOP_STATFS] = {.error_no_count = 10,
.error_no = {EACCES,EBADF,EFAULT,EINTR,
EIO,ENAMETOOLONG,ENOENT,
@@ -113,6 +105,15 @@ sys_error_t error_no_list[] = {
[GF_FOP_REMOVEXATTR] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,ENAMETOOLONG,
EINTR}},
+ [GF_FOP_FSETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FGETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FREMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
[GF_FOP_OPENDIR] = { .error_no_count = 8,
.error_no = {EACCES,EEXIST,EFAULT,
EISDIR,EMFILE,
@@ -237,6 +238,8 @@ conv_errno_to_int (char **error_no)
return EINTR;
else if (!strcmp ((*error_no), "EFBIG"))
return EFBIG;
+ else if (!strcmp((*error_no), "GF_ERROR_SHORT_WRITE"))
+ return GF_ERROR_SHORT_WRITE;
else
return EAGAIN;
}
@@ -286,6 +289,12 @@ get_fop_int (char **op_no_str)
return GF_FOP_GETXATTR;
else if (!strcmp ((*op_no_str), "removexattr"))
return GF_FOP_REMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "fsetxattr"))
+ return GF_FOP_FSETXATTR;
+ else if (!strcmp ((*op_no_str), "fgetxattr"))
+ return GF_FOP_FGETXATTR;
+ else if (!strcmp ((*op_no_str), "fremovexattr"))
+ return GF_FOP_FREMOVEXATTR;
else if (!strcmp ((*op_no_str), "opendir"))
return GF_FOP_OPENDIR;
else if (!strcmp ((*op_no_str), "readdir"))
@@ -372,17 +381,17 @@ error_gen (xlator_t *this, int op_no)
int
error_gen_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode,
- buf, dict, postparent);
- return 0;
+ buf, xdata, postparent);
+ return 0;
}
int
error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -397,36 +406,28 @@ error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ NULL);
+ return 0;
}
STACK_WIND (frame, error_gen_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
- return 0;
-}
-
-
-int
-error_gen_forget (xlator_t *this, inode_t *inode)
-{
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -440,32 +441,31 @@ error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
-
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
int
error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -479,21 +479,21 @@ error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
+ loc, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -507,32 +507,32 @@ error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
+ fd, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -547,32 +547,32 @@ error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (truncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
+ loc, offset, xdata);
+ return 0;
}
int
error_gen_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp =NULL;
@@ -587,31 +587,30 @@ error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
+ fd, offset, xdata);
+ return 0;
}
int
error_gen_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
+ int32_t mask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -625,31 +624,31 @@ error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (access, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
- return 0;
+ loc, mask, xdata);
+ return 0;
}
int
error_gen_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf)
+ const char *path, struct iatt *sbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf, xdata);
+ return 0;
}
int
error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
+ size_t size, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -663,15 +662,15 @@ error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
- return 0;
+ loc, size, xdata);
+ return 0;
}
@@ -679,18 +678,18 @@ int
error_gen_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, dict_t *params)
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -705,15 +704,15 @@ error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, params);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
@@ -721,17 +720,17 @@ int
error_gen_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -746,31 +745,32 @@ error_gen_mkdir (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
int
error_gen_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -784,31 +784,34 @@ error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
+ xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
int
error_gen_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -822,15 +825,15 @@ error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
@@ -838,17 +841,17 @@ int
error_gen_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -863,15 +866,15 @@ error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL); /* pre & post parent attr */
return 0;
}
STACK_WIND (frame, error_gen_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
@@ -879,18 +882,19 @@ int
error_gen_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
+ prenewparent, postnewparent, xdata);
+ return 0;
}
int
error_gen_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -905,15 +909,15 @@ error_gen_rename (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -921,17 +925,17 @@ int
error_gen_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -946,15 +950,15 @@ error_gen_link (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -962,17 +966,18 @@ int
error_gen_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -987,30 +992,30 @@ error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL); /* pre & post attr */
+ NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
- return 0;
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
int
error_gen_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1024,15 +1029,15 @@ error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
+ loc, flags, fd, xdata);
+ return 0;
}
@@ -1040,17 +1045,17 @@ int
error_gen_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref);
- return 0;
+ vector, count, stbuf, iobref, xdata);
+ return 0;
}
int
error_gen_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1065,33 +1070,33 @@ error_gen_readv (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- return 0;
+ fd, size, offset, flags, xdata);
+ return 0;
}
int
error_gen_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t off, struct iobref *iobref)
+ off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1103,31 +1108,47 @@ error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (enable)
op_errno = error_gen (this, GF_FOP_WRITE);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ if (op_errno == GF_ERROR_SHORT_WRITE) {
+ struct iovec *shortvec;
+
+ /*
+ * A short write error returns some value less than what was
+ * requested from a write. To simulate this, replace the vector
+ * with one half the size;
+ */
+ shortvec = iov_dup(vector, 1);
+ shortvec->iov_len /= 2;
+
+ STACK_WIND(frame, error_gen_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, shortvec, count,
+ off, flags, iobref, xdata);
+ GF_FREE(shortvec);
return 0;
+ } else if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, off, iobref);
- return 0;
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
int
error_gen_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1141,15 +1162,15 @@ error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
@@ -1157,15 +1178,15 @@ int
error_gen_fsync_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
-error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1179,29 +1200,29 @@ error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1215,29 +1236,29 @@ error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
int
error_gen_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
-error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1251,29 +1272,29 @@ error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
int
error_gen_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1287,30 +1308,29 @@ error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1324,31 +1344,30 @@ error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1362,30 +1381,30 @@ error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
- return 0;
+ loc, dict, flags, xdata);
+ return 0;
}
int
error_gen_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1399,31 +1418,103 @@ error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+error_gen_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- return 0;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FGETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FGETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1437,31 +1528,30 @@ error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
- return 0;
+ loc, flags, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
-
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1475,31 +1565,30 @@ error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
- return 0;
+ fd, flags, dict, xdata);
+ return 0;
}
int
error_gen_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1513,30 +1602,66 @@ error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FREMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FREMOVEXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
}
int
error_gen_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
int
error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1550,32 +1675,31 @@ error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
- return 0;
+ fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1589,32 +1713,31 @@ error_gen_inodelk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_finodelk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1628,32 +1751,31 @@ error_gen_finodelk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1667,32 +1789,31 @@ error_gen_entrylk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
int
-error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1706,15 +1827,15 @@ error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
@@ -1726,8 +1847,7 @@ error_gen_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, char *spec_data)
{
STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
-
- return 0;
+ return 0;
}
@@ -1761,16 +1881,17 @@ error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
int
error_gen_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off)
+ size_t size, off_t off, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1784,30 +1905,31 @@ error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL, xdata);
return 0;
}
STACK_WIND (frame, error_gen_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
+ fd, size, off, xdata);
return 0;
}
int
error_gen_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
+ off_t off, dict_t *dict)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1821,29 +1943,94 @@ error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
+ fd, size, off, dict);
return 0;
}
-
-int
-error_gen_closedir (xlator_t *this, fd_t *fd)
+static void
+error_gen_set_failure (eg_t *pvt, int percent)
{
- return 0;
+ GF_ASSERT (pvt);
+
+ if (percent)
+ pvt->failure_iter_no = 100/percent;
+ else
+ pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
}
+static void
+error_gen_parse_fill_fops (eg_t *pvt, char *enable_fops)
+{
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
-int
-error_gen_close (xlator_t *this, fd_t *fd)
+ GF_ASSERT (pvt);
+ GF_ASSERT (this);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 0;
+
+ if (!enable_fops) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "All fops are enabled.");
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 1;
+ } else {
+ op_no_str = strtok_r (enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = get_fop_int (&op_no_str);
+ if (op_no == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Wrong option value %s", op_no_str);
+ } else
+ pvt->enable[op_no] = 1;
+
+ op_no_str = strtok_r (NULL, ",", &saveptr);
+ }
+ }
+}
+
+int32_t
+error_gen_priv_dump (xlator_t *this)
{
- return 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ eg_t *conf = NULL;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.debug.error-gen.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.debug.error-gen","%s.priv",
+ this->name);
+
+ gf_proc_dump_write("op_count", "%d", conf->op_count);
+ gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
+ gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("random_failure", "%d", conf->random_failure);
+
+ UNLOCK(&conf->lock);
+out:
+ return ret;
}
int32_t
@@ -1866,18 +2053,43 @@ mem_acct_init (xlator_t *this)
}
int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
+
+ if (!this || !this->private)
+ goto out;
+
+ pvt = this->private;
+
+ GF_OPTION_RECONF ("error-no", pvt->error_no, options, str, out);
+
+ GF_OPTION_RECONF ("failure", failure_percent_int, options, int32,
+ out);
+
+ GF_OPTION_RECONF ("enable", error_enable_fops, options, str, out);
+
+ GF_OPTION_RECONF ("random-failure", pvt->random_failure, options,
+ bool, out);
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
+}
+
+int
init (xlator_t *this)
{
eg_t *pvt = NULL;
- data_t *error_no = NULL;
- data_t *failure_percent = NULL;
- data_t *enable = NULL;
- gf_boolean_t random_failure = _gf_false;
int32_t ret = 0;
char *error_enable_fops = NULL;
- char *op_no_str = NULL;
- int op_no = -1;
- int i = 0;
int32_t failure_percent_int = 0;
if (!this->children || this->children->next) {
@@ -1892,79 +2104,34 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- error_no = dict_get (this->options, "error-no");
- failure_percent = dict_get (this->options, "failure");
- enable = dict_get (this->options, "enable");
-
pvt = GF_CALLOC (1, sizeof (eg_t), gf_error_gen_mt_eg_t);
if (!pvt) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory.");
ret = -1;
goto out;
}
LOCK_INIT (&pvt->lock);
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 0;
- if (!error_no) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error-no not specified.");
- } else {
- pvt->error_no = data_to_str (error_no);
- }
+ GF_OPTION_INIT ("error-no", pvt->error_no, str, out);
- if (!failure_percent) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failure percent not specified.");
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- } else {
- failure_percent_int = data_to_int32 (failure_percent);
- if (failure_percent_int)
- pvt->failure_iter_no = 100/failure_percent_int;
- else
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- }
+ GF_OPTION_INIT ("failure", failure_percent_int, int32, out);
- if (!enable) {
- gf_log (this->name, GF_LOG_WARNING,
- "All fops are enabled.");
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 1;
- } else {
- error_enable_fops = data_to_str (enable);
- op_no_str = error_enable_fops;
- while ((*error_enable_fops) != '\0') {
- error_enable_fops++;
- if (((*error_enable_fops) == ',') ||
- ((*error_enable_fops) == '\0')) {
- if ((*error_enable_fops) != '\0') {
- (*error_enable_fops) = '\0';
- error_enable_fops++;
- }
- op_no = get_fop_int (&op_no_str);
- if (op_no == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Wrong option value %s",
- op_no_str);
- } else
- pvt->enable[op_no] = 1;
- op_no_str = error_enable_fops;
- }
- }
- }
+ GF_OPTION_INIT ("enable", error_enable_fops, str, out);
- random_failure = dict_get_str_boolean (this->options, "random-failure",
- _gf_false);
- pvt->random_failure = random_failure;
+ GF_OPTION_INIT ("random-failure", pvt->random_failure, bool, out);
+
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
this->private = pvt;
/* Give some seed value here */
srand (time(NULL));
out:
+ if (ret)
+ GF_FREE (pvt);
return ret;
}
@@ -1986,6 +2153,12 @@ fini (xlator_t *this)
return;
}
+struct xlator_dumpops dumpops = {
+ .priv = error_gen_priv_dump,
+};
+
+struct xlator_fops cbks;
+
struct xlator_fops fops = {
.lookup = error_gen_lookup,
.stat = error_gen_stat,
@@ -2008,6 +2181,9 @@ struct xlator_fops fops = {
.setxattr = error_gen_setxattr,
.getxattr = error_gen_getxattr,
.removexattr = error_gen_removexattr,
+ .fsetxattr = error_gen_fsetxattr,
+ .fgetxattr = error_gen_fgetxattr,
+ .fremovexattr = error_gen_fremovexattr,
.opendir = error_gen_opendir,
.readdir = error_gen_readdir,
.readdirp = error_gen_readdirp,
@@ -2028,24 +2204,29 @@ struct xlator_fops fops = {
.getspec = error_gen_getspec,
};
-struct xlator_cbks cbks = {
- .release = error_gen_close,
- .releasedir = error_gen_closedir,
-};
-
struct volume_options options[] = {
{ .key = {"failure"},
- .type = GF_OPTION_TYPE_INT },
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Percentage failure of operations when enabled.",
+ },
+
{ .key = {"error-no"},
.value = {"ENOENT","ENOTDIR","ENAMETOOLONG","EACCES","EBADF",
"EFAULT","ENOMEM","EINVAL","EIO","EEXIST","ENOSPC",
"EPERM","EROFS","EBUSY","EISDIR","ENOTEMPTY","EMLINK"
"ENODEV","EXDEV","EMFILE","ENFILE","ENOSYS","EINTR",
- "EFBIG","EAGAIN"},
- .type = GF_OPTION_TYPE_STR },
+ "EFBIG","EAGAIN","GF_ERROR_SHORT_WRITE"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
{ .key = {"random-failure"},
- .type = GF_OPTION_TYPE_BOOL},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
{ .key = {"enable"},
- .type = GF_OPTION_TYPE_STR },
+ .type = GF_OPTION_TYPE_STR,
+ },
+
{ .key = {NULL} }
};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
index bb3adb2ab..d92c23062 100644
--- a/xlators/debug/error-gen/src/error-gen.h
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _ERROR_GEN_H
#define _ERROR_GEN_H
@@ -29,6 +19,18 @@
#define GF_FAILURE_DEFAULT 10
+/*
+ * Pseudo-errors refer to errors beyond the scope of traditional <-1, op_errno>
+ * returns. This facilitates the ability to return unexpected, but not -1 values
+ * and/or to inject operations that lead to implicit error conditions. The range
+ * for pseudo errors resides at a high value to avoid conflicts with the errno
+ * range.
+ */
+enum GF_PSEUDO_ERRORS {
+ GF_ERROR_SHORT_WRITE = 1000, /* short writev return value */
+ GF_ERROR_MAX
+};
+
typedef struct {
int enable[GF_FOP_MAXVALUE];
int op_count;
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index b894e79c3..332d79015 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,14 +2,15 @@
xlator_LTLIBRARIES = io-stats.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-io_stats_la_LDFLAGS = -module -avoidversion
+io_stats_la_LDFLAGS = -module -avoid-version
io_stats_la_SOURCES = io-stats.c
io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = io-stats-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index 2063f6d6a..c30dfb17e 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __IO_STATS_MEM_TYPES_H__
#define __IO_STATS_MEM_TYPES_H__
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index d216cc549..7fb697ae4 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -58,7 +48,7 @@ typedef enum {
IOS_STATS_TYPE_READDIRP,
IOS_STATS_TYPE_READ_THROUGHPUT,
IOS_STATS_TYPE_WRITE_THROUGHPUT,
- IOS_STATS_TYPE_MAX,
+ IOS_STATS_TYPE_MAX
}ios_stats_type_t;
typedef enum {
@@ -293,43 +283,43 @@ is_fop_latency_started (call_frame_t *frame)
} \
UNLOCK (&iosstat->lock); \
ios_stat_add_to_list (&conf->list[type], \
- value, iosstat); \
+ value, iosstat); \
\
} while (0)
#define BUMP_THROUGHPUT(iosstat, type) \
do { \
- struct ios_conf *conf = NULL; \
- double elapsed; \
- struct timeval *begin, *end; \
- double throughput; \
+ struct ios_conf *conf = NULL; \
+ double elapsed; \
+ struct timeval *begin, *end; \
+ double throughput; \
int flag = 0; \
- \
- begin = &frame->begin; \
- end = &frame->end; \
- \
- elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
- + (end->tv_usec - begin->tv_usec); \
- throughput = op_ret / elapsed; \
- \
- conf = this->private; \
- LOCK(&iosstat->lock); \
- { \
- if (iosstat->thru_counters[type].throughput \
+ \
+ begin = &frame->begin; \
+ end = &frame->end; \
+ \
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
+ + (end->tv_usec - begin->tv_usec); \
+ throughput = op_ret / elapsed; \
+ \
+ conf = this->private; \
+ LOCK(&iosstat->lock); \
+ { \
+ if (iosstat->thru_counters[type].throughput \
<= throughput) { \
- iosstat->thru_counters[type].throughput = \
+ iosstat->thru_counters[type].throughput = \
throughput; \
- gettimeofday (&iosstat-> \
+ gettimeofday (&iosstat-> \
thru_counters[type].time, NULL); \
flag = 1; \
} \
} \
- UNLOCK (&iosstat->lock); \
+ UNLOCK (&iosstat->lock); \
if (flag) \
ios_stat_add_to_list (&conf->thru_list[type], \
throughput, iosstat); \
- } while (0)
+ } while (0)
int
ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
@@ -484,12 +474,12 @@ ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
new = GF_CALLOC (1, sizeof (*new),
gf_io_stats_mt_ios_stat_list);
new->iosstat = iosstat;
- new->value = value;
+ new->value = value;
ios_stat_ref (iosstat);
- list_add_tail (&new->list, &tmp->list);
+ list_add_tail (&new->list, &tmp->list);
stat = last->iosstat;
last->iosstat = NULL;
- ios_stat_unref (stat);
+ ios_stat_unref (stat);
list_del (&last->list);
GF_FREE (last);
if (reposition == MAX_LIST_MEMBERS)
@@ -511,7 +501,7 @@ ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
list_head->members++;
if (list_head->min_cnt > value)
list_head->min_cnt = value;
- }
+ }
}
out:
UNLOCK (&list_head->lock);
@@ -568,19 +558,16 @@ ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
FILE* logfp, ios_stats_type_t type)
{
struct ios_stat_list *entry = NULL;
- struct timeval time = {0, };
- struct tm *tm = NULL;
+ struct timeval time = {0, };
char timestr[256] = {0, };
LOCK (&list_head->lock);
{
list_for_each_entry (entry, &list_head->iosstats->list, list) {
- time = entry->iosstat->thru_counters[type].time;
- tm = localtime (&time.tv_sec);
- if (!tm)
- continue;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ entry->iosstat->thru_counters[type].time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS, time.tv_usec);
ios_log (this, logfp, "%s \t %-10.2f \t %s",
@@ -600,7 +587,6 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
int index = 0;
struct ios_stat_head *list_head = NULL;
struct ios_conf *conf = NULL;
- struct tm *tm = NULL;
char timestr[256] = {0, };
char str_header[128] = {0};
char str_read[128] = {0};
@@ -694,9 +680,10 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
if (interval == -1) {
LOCK (&conf->lock);
{
- tm = localtime (&conf->cumulative.max_openfd_time.tv_sec);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS,
conf->cumulative.max_openfd_time.tv_usec);
ios_log (this, logfp, "Current open fd's: %"PRId64
@@ -735,13 +722,13 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
"\tFILE NAME");
list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
+ ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
ios_log (this, logfp, "\n======Write Throughput File Stats======");
ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
"\tFILE NAME");
list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
- ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
+ ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
}
return 0;
}
@@ -1080,36 +1067,45 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
struct ios_stat_list *entry = NULL;
int ret = -1;
ios_stats_thru_t index = IOS_STATS_THRU_MAX;
- struct tm *tm = NULL;
char timestr[256] = {0, };
+ char *dict_timestr = NULL;
conf = this->private;
switch (flags) {
- case IOS_STATS_TYPE_OPEN:
+ case IOS_STATS_TYPE_OPEN:
list_head = &conf->list[IOS_STATS_TYPE_OPEN];
LOCK (&conf->lock);
{
ret = dict_set_uint64 (resp, "current-open",
conf->cumulative.nr_opens);
if (ret)
- goto out;
+ goto unlock;
ret = dict_set_uint64 (resp, "max-open",
conf->cumulative.max_nr_opens);
- tm = localtime (&conf->cumulative.max_openfd_time.tv_sec);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
-
- ret = dict_set_str (resp, "max-openfd-time",
- timestr);
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ if (conf->cumulative.max_openfd_time.tv_sec)
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+
+ dict_timestr = gf_strdup (timestr);
+ if (!dict_timestr)
+ goto unlock;
+ ret = dict_set_dynstr (resp, "max-openfd-time",
+ dict_timestr);
if (ret)
- goto out;
+ goto unlock;
}
+ unlock:
UNLOCK (&conf->lock);
-
+ /* Do not proceed if we came here because of some error
+ * during the dict operation */
+ if (ret)
+ goto out;
break;
case IOS_STATS_TYPE_READ:
list_head = &conf->list[IOS_STATS_TYPE_READ];
@@ -1125,7 +1121,7 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
break;
case IOS_STATS_TYPE_READ_THROUGHPUT:
list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- index = IOS_STATS_THRU_READ;
+ index = IOS_STATS_THRU_READ;
break;
case IOS_STATS_TYPE_WRITE_THROUGHPUT:
list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
@@ -1146,39 +1142,44 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
snprintf (key, 256, "%s-%d", "filename", cnt);
ret = dict_set_str (resp, key, entry->iosstat->filename);
if (ret)
- goto out;
+ goto unlock_list_head;
snprintf (key, 256, "%s-%d", "value",cnt);
ret = dict_set_uint64 (resp, key, entry->value);
if (ret)
- goto out;
+ goto unlock_list_head;
if (index != IOS_STATS_THRU_MAX) {
snprintf (key, 256, "%s-%d", "time-sec", cnt);
- ret = dict_set_int32 (resp, key,
+ ret = dict_set_int32 (resp, key,
entry->iosstat->thru_counters[index].time.tv_sec);
if (ret)
- goto out;
+ goto unlock_list_head;
snprintf (key, 256, "%s-%d", "time-usec", cnt);
- ret = dict_set_int32 (resp, key,
+ ret = dict_set_int32 (resp, key,
entry->iosstat->thru_counters[index].time.tv_usec);
if (ret)
- goto out;
+ goto unlock_list_head;
}
if (cnt == list_cnt)
break;
}
}
+unlock_list_head:
UNLOCK (&list_head->lock);
-
+ /* ret is !=0 if some dict operation in the above critical region
+ * failed. */
+ if (ret)
+ goto out;
ret = dict_set_int32 (resp, "members", cnt);
out:
return ret;
}
+
int
io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
@@ -1231,21 +1232,21 @@ io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unwind:
UPDATE_PROFILE_STATS (frame, CREATE);
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
struct ios_stat *iosstat = NULL;
struct ios_conf *conf = NULL;
- conf = this->private;
+ conf = this->private;
path = frame->local;
frame->local = NULL;
@@ -1269,6 +1270,16 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ios_fd_ctx_set (fd, this, iosfd);
ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!iosstat) {
+ iosstat = GF_CALLOC (1, sizeof (*iosstat),
+ gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ iosstat->filename = gf_strdup (path);
+ uuid_copy (iosstat->gfid, fd->inode->gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+ }
+ }
LOCK (&conf->lock);
{
@@ -1286,7 +1297,7 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unwind:
UPDATE_PROFILE_STATS (frame, OPEN);
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -1294,10 +1305,10 @@ unwind:
int
io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, STAT);
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -1306,7 +1317,7 @@ int
io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *buf, struct iobref *iobref)
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
{
int len = 0;
fd_t *fd = NULL;
@@ -1330,7 +1341,7 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
+ vector, count, buf, iobref, xdata);
return 0;
}
@@ -1339,7 +1350,7 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
inode_t *inode = NULL;
@@ -1351,13 +1362,13 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ios_inode_ctx_get (inode, this, &iosstat);
if (iosstat) {
BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
- BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
inode = NULL;
iosstat = NULL;
}
}
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
@@ -1367,7 +1378,7 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
inode_t *inode = frame->local;
@@ -1383,17 +1394,17 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
iosstat = NULL;
}
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, READDIR);
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -1401,10 +1412,10 @@ io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSYNC);
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
@@ -1412,10 +1423,10 @@ io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SETATTR);
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
return 0;
}
@@ -1423,11 +1434,11 @@ io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, UNLINK);
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1437,12 +1448,12 @@ int
io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, RENAME);
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
+ prenewparent, postnewparent, xdata);
return 0;
}
@@ -1450,10 +1461,10 @@ io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *buf,
- struct iatt *sbuf)
+ struct iatt *sbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, READLINK);
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
return 0;
}
@@ -1462,10 +1473,10 @@ int
io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
UPDATE_PROFILE_STATS (frame, LOOKUP);
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr,
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
}
@@ -1475,11 +1486,11 @@ int
io_stats_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SYMLINK);
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1488,11 +1499,11 @@ int
io_stats_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, MKNOD);
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1501,7 +1512,8 @@ int
io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
char *path = frame->local;
@@ -1519,8 +1531,11 @@ io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
unwind:
+ /* local is assigned with path */
+ GF_FREE (frame->local);
+ frame->local = NULL;
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1529,28 +1544,28 @@ int
io_stats_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, LINK);
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FLUSH);
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
int ret = -1;
@@ -1566,7 +1581,7 @@ io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
unwind:
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -1574,13 +1589,13 @@ unwind:
int
io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, RMDIR);
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1588,71 +1603,100 @@ io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, TRUNCATE);
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, STATFS);
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SETXATTR);
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, GETXATTR);
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, REMOVEXATTR);
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+io_stats_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FSETXATTR);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FGETXATTR);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FREMOVEXATTR);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSYNCDIR);
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, ACCESS);
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -1660,92 +1704,126 @@ io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FTRUNCATE);
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSTAT);
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
+io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FALLOCATE);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int
+io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, DISCARD);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, ZEROFILL);
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, LK);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
int
io_stats_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, ENTRYLK);
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, XATTROP);
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FXATTROP);
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, INODELK);
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
int
io_stats_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
@@ -1753,128 +1831,122 @@ io_stats_inodelk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
int
io_stats_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FINODELK);
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
-io_stats_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
int
-io_stats_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
-
+ loc, flags, dict, xdata);
return 0;
}
int
-io_stats_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
+ fd, flags, dict, xdata);
return 0;
}
int
io_stats_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
+ loc, xdata);
return 0;
}
int
-io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
-
+ loc, xdata);
return 0;
}
int
io_stats_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
-
+ loc, size, xdata);
return 0;
}
int
-io_stats_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, params);
-
+ loc, mode, dev, umask, xdata);
return 0;
}
int
io_stats_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -1883,117 +1955,112 @@ io_stats_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
int
io_stats_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
int
io_stats_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
-
+ loc, flags, xdata);
return 0;
}
int
-io_stats_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
-
+ linkpath, loc, umask, xdata);
return 0;
}
int
io_stats_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
-
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid)
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
-
+ loc, stbuf, valid, xdata);
return 0;
}
int
io_stats_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
-
+ loc, offset, xdata);
return 0;
}
int
-io_stats_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd, int32_t wbflags)
+io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -2002,7 +2069,7 @@ io_stats_open (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
@@ -2010,7 +2077,7 @@ io_stats_open (call_frame_t *frame, xlator_t *this,
int
io_stats_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -2019,14 +2086,14 @@ io_stats_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
int
io_stats_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
frame->local = fd;
@@ -2035,7 +2102,7 @@ io_stats_readv (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -2044,7 +2111,7 @@ int
io_stats_writev (call_frame_t *frame, xlator_t *this,
fd_t *fd, struct iovec *vector,
int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int len = 0;
@@ -2052,14 +2119,13 @@ io_stats_writev (call_frame_t *frame, xlator_t *this,
frame->local = fd->inode;
len = iov_length (vector, count);
-
BUMP_WRITE (fd, len);
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
@@ -2067,47 +2133,47 @@ io_stats_writev (call_frame_t *frame, xlator_t *this,
int
io_stats_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
int
io_stats_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
int
io_stats_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags)
+ fd_t *fd, int32_t flags, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-void
+int
conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
{
struct {
@@ -2131,27 +2197,27 @@ conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
if (!strncmp (filename, "", 1)) {
gf_log (this->name, GF_LOG_ERROR, "No filename given");
- return;
+ return -1;
}
logfp = fopen (filename, "w+");
- GF_ASSERT (logfp);
if (!logfp) {
gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
"for writing", filename);
- return;
+ return -1;
}
(void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
logfp);
io_stats_dump (this, &args);
fclose (logfp);
}
+ return 0;
}
int
io_stats_setxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
struct {
xlator_t *this;
@@ -2170,43 +2236,85 @@ io_stats_setxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
int
io_stats_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
int
io_stats_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+ STACK_WIND (frame, io_stats_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
return 0;
}
int
io_stats_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
@@ -2214,13 +2322,13 @@ io_stats_opendir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
frame->local = fd->inode;
START_FOP_LATENCY (frame);
@@ -2228,108 +2336,144 @@ io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
STACK_WIND (frame, io_stats_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
-
+ fd, size, offset, dict);
return 0;
}
int
io_stats_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
-
+ fd, size, offset, xdata);
return 0;
}
int
io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
int
io_stats_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
}
int
io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
-
+ fd, offset, xdata);
return 0;
}
int
io_stats_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid)
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
int
io_stats_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+}
+
+
+int
+io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
return 0;
}
int
io_stats_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
+ fd, cmd, lock, xdata);
return 0;
}
@@ -2346,7 +2490,7 @@ io_stats_release (xlator_t *this, fd_t *fd)
LOCK (&conf->lock);
{
- conf->cumulative.nr_opens--;
+ conf->cumulative.nr_opens--;
}
UNLOCK (&conf->lock);
@@ -2354,8 +2498,7 @@ io_stats_release (xlator_t *this, fd_t *fd)
if (iosfd) {
io_stats_dump_fd (this, iosfd);
- if (iosfd->filename)
- GF_FREE (iosfd->filename);
+ GF_FREE (iosfd->filename);
GF_FREE (iosfd);
}
@@ -2380,6 +2523,93 @@ io_stats_forget (xlator_t *this, inode_t *inode)
return 0;
}
+static int
+ios_init_top_stats (struct ios_conf *conf)
+{
+ int i = 0;
+
+ GF_ASSERT (conf);
+
+ for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
+ conf->list[i].iosstats = GF_CALLOC (1,
+ sizeof(*conf->list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->list[i].iosstats->list);
+ LOCK_INIT (&conf->list[i].lock);
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
+ conf->thru_list[i].iosstats = GF_CALLOC (1,
+ sizeof (*conf->thru_list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->thru_list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
+ LOCK_INIT (&conf->thru_list[i].lock);
+ }
+
+ return 0;
+}
+
+static void
+ios_destroy_top_stats (struct ios_conf *conf)
+{
+ int i = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *list = NULL;
+ struct ios_stat *stat = NULL;
+
+ GF_ASSERT (conf);
+
+ LOCK (&conf->lock);
+
+ conf->cumulative.nr_opens = 0;
+ conf->cumulative.max_nr_opens = 0;
+ conf->cumulative.max_openfd_time.tv_sec = 0;
+ conf->cumulative.max_openfd_time.tv_usec = 0;
+
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ list_head = &conf->list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ list_head = &conf->thru_list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
+
+ UNLOCK (&conf->lock);
+
+ return;
+}
int
reconfigure (xlator_t *this, dict_t *options)
@@ -2447,7 +2677,6 @@ int
init (xlator_t *this)
{
struct ios_conf *conf = NULL;
- int i = 0;
char *sys_log_str = NULL;
int sys_log_level = -1;
char *log_str = NULL;
@@ -2484,35 +2713,9 @@ init (xlator_t *this)
gettimeofday (&conf->cumulative.started_at, NULL);
gettimeofday (&conf->incremental.started_at, NULL);
- for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
- conf->list[i].iosstats = GF_CALLOC (1,
- sizeof(*conf->list[i].iosstats),
- gf_io_stats_mt_ios_stat);
-
- if (!conf->list[i].iosstats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
-
- INIT_LIST_HEAD(&conf->list[i].iosstats->list);
- LOCK_INIT (&conf->list[i].lock);
- }
-
- for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
- conf->thru_list[i].iosstats = GF_CALLOC (1,
- sizeof (*conf->thru_list[i].iosstats),
- gf_io_stats_mt_ios_stat);
-
- if (!conf->thru_list[i].iosstats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
-
- INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
- LOCK_INIT (&conf->thru_list[i].lock);
- }
+ ret = ios_init_top_stats (conf);
+ if (ret)
+ return -1;
GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
@@ -2544,12 +2747,6 @@ void
fini (xlator_t *this)
{
struct ios_conf *conf = NULL;
- struct ios_stat_head *list_head = NULL;
- struct ios_stat_list *entry = NULL;
- struct ios_stat_list *tmp = NULL;
- struct ios_stat_list *list = NULL;
- struct ios_stat *stat = NULL;
- int i = 0;
if (!this)
return;
@@ -2560,45 +2757,15 @@ fini (xlator_t *this)
return;
this->private = NULL;
- for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
- list_head = &conf->list[i];
- if (!list_head)
- continue;
- list_for_each_entry_safe (entry, tmp,
- &list_head->iosstats->list, list) {
- list = entry;
- stat = list->iosstat;
- ios_stat_unref (stat);
- list_del (&list->list);
- if (list)
- GF_FREE (list);
- }
- }
+ ios_destroy_top_stats (conf);
- for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
- list_head = &conf->thru_list[i];
- if (!list_head)
- continue;
- list_for_each_entry_safe (entry, tmp,
- &list_head->iosstats->list, list) {
- list = entry;
- stat = list->iosstat;
- ios_stat_unref (stat);
- list_del (&list->list);
- if (list)
- GF_FREE (list);
- }
- }
-
- if (conf)
- GF_FREE(conf);
+ GF_FREE(conf);
gf_log (this->name, GF_LOG_INFO,
"io-stats translator unloaded");
return;
}
-
int
notify (xlator_t *this, int32_t event, void *data, ...)
{
@@ -2618,6 +2785,28 @@ notify (xlator_t *this, int32_t event, void *data, ...)
va_end (ap);
switch (event) {
case GF_EVENT_TRANSLATOR_INFO:
+ ret = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ if (ret) {
+ ret = dict_set_int32 (output, "top-op", top_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set top-op in dict");
+ goto out;
+ }
+ ios_destroy_top_stats (this->private);
+ ret = ios_init_top_stats (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to reset top stats");
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ goto out;
+ }
+
ret = dict_get_int32 (dict, "top-op", &top_op);
if (!ret) {
ret = dict_get_int32 (dict, "list-cnt", &list_cnt);
@@ -2681,6 +2870,9 @@ struct xlator_fops fops = {
.setxattr = io_stats_setxattr,
.getxattr = io_stats_getxattr,
.removexattr = io_stats_removexattr,
+ .fsetxattr = io_stats_fsetxattr,
+ .fgetxattr = io_stats_fgetxattr,
+ .fremovexattr = io_stats_fremovexattr,
.opendir = io_stats_opendir,
.readdir = io_stats_readdir,
.readdirp = io_stats_readdirp,
@@ -2698,6 +2890,9 @@ struct xlator_fops fops = {
.fxattrop = io_stats_fxattrop,
.setattr = io_stats_setattr,
.fsetattr = io_stats_fsetattr,
+ .fallocate = io_stats_fallocate,
+ .discard = io_stats_discard,
+ .zerofill = io_stats_zerofill,
};
struct xlator_cbks cbks = {
@@ -2741,7 +2936,7 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.default_value = "CRITICAL",
.description = "Gluster's syslog log-level",
- .value = { "WARNING", "ERROR", "CRITICAL"}
+ .value = { "WARNING", "ERROR", "INFO", "CRITICAL"}
},
{ .key = {"brick-log-level"},
.type = GF_OPTION_TYPE_STR,
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
index 0f1679a04..7b2597b4d 100644
--- a/xlators/debug/trace/src/Makefile.am
+++ b/xlators/debug/trace/src/Makefile.am
@@ -2,13 +2,15 @@
xlator_LTLIBRARIES = trace.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-trace_la_LDFLAGS = -module -avoidversion
+trace_la_LDFLAGS = -module -avoid-version
trace_la_SOURCES = trace.c
trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trace.h trace-mem-types.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
new file mode 100644
index 000000000..9fa7d97c2
--- /dev/null
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __TRACE_MEM_TYPES_H__
+#define __TRACE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_trace_mem_types_ {
+ gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
+ gf_trace_mt_end
+};
+#endif
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 88e413c1d..c9d839356 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -1,26 +1,15 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "trace.h"
+#include "trace-mem-types.h"
/**
* xlators/debug/trace :
@@ -29,1203 +18,1388 @@
* Very helpful translator for debugging.
*/
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-
-
-struct {
- char *name;
- int enabled;
-} trace_fop_names[GF_FOP_MAXVALUE];
-
-int trace_log_level = GF_LOG_INFO;
-
-static char *
-trace_stat_to_str (struct iatt *buf)
+int
+dump_history_trace (circular_buffer_t *cb, void *data)
{
- char *statstr = NULL;
- char atime_buf[256] = {0,};
- char mtime_buf[256] = {0,};
- char ctime_buf[256] = {0,};
- int asprint_ret_value = 0;
- uint64_t ia_time = 0;
-
- if (!buf) {
- statstr = NULL;
- goto out;
- }
-
- ia_time = buf->ia_atime;
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
-
- ia_time = buf->ia_mtime;
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
+ char *string = NULL;
+ struct tm *tm = NULL;
+ char timestr[256] = {0,};
- ia_time = buf->ia_ctime;
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
+ string = (char *)cb->data;
+ tm = localtime (&cb->tv.tv_sec);
- asprint_ret_value = gf_asprintf (&statstr,
- "gfid=%s ino=%"PRIu64", mode=%o, "
- "nlink=%"GF_PRI_NLINK", uid=%u, "
- "gid=%u, size=%"PRIu64", "
- "blocks=%"PRIu64", atime=%s, "
- "mtime=%s, ctime=%s",
- uuid_utoa (buf->ia_gfid), buf->ia_ino,
- st_mode_from_ia (buf->ia_prot,
- buf->ia_type),
- buf->ia_nlink, buf->ia_uid,
- buf->ia_gid, buf->ia_size,
- buf->ia_blocks, atime_buf,
- mtime_buf, ctime_buf);
+ /* Since we are continuing with adding entries to the buffer even when
+ gettimeofday () fails, it's safe to check tm and then dump the time
+ at which the entry was added to the buffer */
+ if (tm) {
+ strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
+ }
- if (asprint_ret_value < 0)
- statstr = NULL;
+ gf_proc_dump_write ("FOP", "%s\n", string);
-out:
- return statstr;
+ return 0;
}
-
int
trace_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
- "*stbuf {%s}, *preparent {%s}, *postparent = "
- "{%s})", frame->root->unique,
- uuid_utoa (inode->gfid), op_ret, fd,
- statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (preparentstr)
- GF_FREE (preparentstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
+ "*stbuf {%s}, *preparent {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid), op_ret, fd,
+ statstr, preparentstr, postparentstr);
/* for 'release' log */
fd_ctx_set (fd, this, 0);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, *fd=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, "
+ "*fd=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ fd);
+
+ LOG_ELEMENT (conf, string);
}
+out:
/* for 'release' log */
if (op_ret >= 0)
fd_ctx_set (fd, this, 0);
- frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ TRACE_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref)
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref);
+out:
+ TRACE_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
return 0;
}
-
int
trace_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
-
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
-
int
trace_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s}",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_SETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
return 0;
}
-
int
trace_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
return 0;
}
-
int
trace_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (frame->local), op_ret, preparentstr,
- postparentstr);
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ " *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preoldparentstr = NULL;
- char *postoldparentstr = NULL;
- char *prenewparentstr = NULL;
- char *postnewparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preoldparentstr = trace_stat_to_str (preoldparent);
- postoldparentstr = trace_stat_to_str (postoldparent);
-
- prenewparentstr = trace_stat_to_str (prenewparent);
- postnewparentstr = trace_stat_to_str (postnewparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- "*preoldparent = {%s}, *postoldparent = {%s}"
- " *prenewparent = {%s}, *postnewparent = {%s})",
- frame->root->unique, op_ret, statstr,
- preoldparentstr, postoldparentstr,
- prenewparentstr, postnewparentstr);
+ char statstr[4096] = {0, };
+ char preoldparentstr[4096] = {0, };
+ char postoldparentstr[4096] = {0, };
+ char prenewparentstr[4096] = {0, };
+ char postnewparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preoldparentstr)
- GF_FREE (preoldparentstr);
+ conf = this->private;
- if (postoldparentstr)
- GF_FREE (postoldparentstr);
-
- if (prenewparentstr)
- GF_FREE (prenewparentstr);
-
- if (postnewparentstr)
- GF_FREE (postnewparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preoldparent, preoldparentstr);
+ trace_stat_to_str (postoldparent, postoldparentstr);
+ trace_stat_to_str (prenewparent, prenewparentstr);
+ trace_stat_to_str (postnewparent, postnewparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *preoldparent = {%s},"
+ " *postoldparent = {%s}"
+ " *prenewparent = {%s}, "
+ "*postnewparent = {%s})",
+ frame->root->unique, op_ret, statstr,
+ preoldparentstr, postoldparentstr,
+ prenewparentstr, postnewparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
+out:
+ TRACE_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
return 0;
}
-
int
trace_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *stbuf)
+ const char *buf, struct iatt *stbuf, dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
if (op_ret == 0) {
- statstr = trace_stat_to_str (stbuf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s, "
- "stbuf = { %s })",
- frame->root->unique, op_ret, op_errno, buf,
- statstr);
- } else
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ trace_stat_to_str (stbuf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d,"
+ "buf=%s, stbuf = { %s })",
+ frame->root->unique, op_ret, op_errno,
+ buf, statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
- if (statstr)
- GF_FREE (statstr);
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, stbuf);
+out:
+ TRACE_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, stbuf,
+ xdata);
return 0;
}
-
int
trace_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
- char *statstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*buf {%s}, *postparent {%s}",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (postparent, postparentstr);
+ /* print buf->ia_gfid instead of inode->gfid,
+ * since if the inode is not yet linked to the
+ * inode table (fresh lookup) then null gfid
+ * will be printed.
+ */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*buf {%s}, *postparent {%s}",
+ frame->root->unique,
+ uuid_utoa (buf->ia_gfid),
+ op_ret, statstr, postparentstr);
/* For 'forget' */
inode_ctx_put (inode, this, 0);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xattr, postparent);
+out:
+ TRACE_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
return 0;
}
-
int
trace_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (postparentstr)
- GF_FREE (postparentstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- ", *stbuf = {%s}, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ ", *stbuf = {%s}, *prebuf = {%s}, "
+ "*postbuf = {%s} )",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- " *prebuf = {%s}, *postbuf = {%s})",
- frame->root->unique, op_ret,
- statstr, preparentstr, postparentstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (statstr)
- GF_FREE (statstr);
+ conf = this->private;
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *prebuf = {%s},"
+ " *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ statstr, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, fd=%p",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno, fd);
- }
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " fd=%p",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno, fd);
+ LOG_ELEMENT (conf, string);
+ }
+out:
/* for 'releasedir' log */
if (op_ret >= 0)
fd_ctx_set (fd, this, 0);
- frame->local = NULL;
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ TRACE_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, preparentstr, postparentstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "*prebuf={%s}, *postbuf={%s}",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret, preopstr,
- postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
-
int
trace_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
- ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
- "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
- GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
- frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
- buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
- buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": ({f_bsize=%lu, "
+ "f_frsize=%lu, "
+ "f_blocks=%"GF_PRI_FSBLK
+ ", f_bfree=%"GF_PRI_FSBLK", "
+ "f_bavail=%"GF_PRI_FSBLK", "
+ "f_files=%"GF_PRI_FSBLK", "
+ "f_ffree=%"GF_PRI_FSBLK", "
+ "f_favail=%"GF_PRI_FSBLK", "
+ "f_fsid=%lu, f_flag=%lu, "
+ "f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize,
+ buf->f_frsize, buf->f_blocks,
+ buf->f_bfree, buf->f_bavail,
+ buf->f_files, buf->f_ffree,
+ buf->f_favail, buf->f_fsid,
+ buf->f_flag, buf->f_namemax, op_ret);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
- frame->local = NULL;
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
trace_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
trace_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *prebufstr = NULL;
- char *postbufstr = NULL;
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- if (op_ret >= 0) {
- prebufstr = trace_stat_to_str (prebuf);
- postbufstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret,
- prebufstr, postbufstr);
+ char prebufstr[4096] = {0, };
+ char postbufstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (prebufstr)
- GF_FREE (prebufstr);
-
- if (postbufstr)
- GF_FREE (postbufstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, prebufstr);
+ trace_stat_to_str (postbuf, postbufstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ prebufstr, postbufstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char string[4096] = {0.};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d "
+ "buf=%s", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "{l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len,
+ lock->l_pid);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
- }
- frame->local = NULL;
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
-
-
int
trace_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
trace_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
}
- frame->local = NULL;
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum);
+out:
+ TRACE_STACK_UNWIND (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
return 0;
}
@@ -1235,35 +1409,54 @@ trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
trace_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ volume, loc->path, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
-
int
trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {0,};
switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
@@ -1306,32 +1499,43 @@ trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
break;
}
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), volume,
+ loc->path, cmd_str, type_str,
+ (unsigned long long)flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
-
int
trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *cmd_str = NULL, *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
@@ -1374,233 +1578,351 @@ trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
break;
}
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd =%p "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (fd->inode->gfid), volume, fd,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd =%p "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ cmd_str, type_str,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
frame->local = fd->inode->gfid;
- }
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
-
int
trace_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (path=%s flags=%d)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (path=%s flags=%d)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
+ loc, flags, dict, xdata);
return 0;
}
-
int
trace_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
+ fd, flags, dict, xdata);
return 0;
}
-
int
trace_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
/* TODO: print all the keys mentioned in xattr_req */
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ loc, xdata);
return 0;
}
-
int
-trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, size=%"GF_PRI_SIZET")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, size);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "size=%"GF_PRI_SIZET")", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ size);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
}
-
int
trace_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t dev, dict_t *params)
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%d dev=%"GF_PRI_DEV")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode, dev);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d "
+ "umask=0%o, dev=%"GF_PRI_DEV")",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask, dev);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, params);
+ loc, mode, dev, umask, xdata);
return 0;
}
-
int
trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
-
int
-trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flag=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ xflag);
+
frame->local = loc->inode->gfid;
- }
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
-
int
-trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
+ loc, flags, xdata);
return 0;
}
-
int
trace_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s linkpath=%s, path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- linkpath, loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s linkpath=%s, path=%s"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), linkpath,
+ loc->path, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
return 0;
}
-
int
-trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
if (newloc->inode)
uuid_utoa_r (newloc->inode->gfid, newgfid);
else
@@ -1608,29 +1930,40 @@ trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
uuid_utoa_r (oldloc->inode->gfid, oldgfid);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path, newgfid, newloc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid,
+ oldloc->path, newgfid, newloc->path);
frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
-trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LINK].enabled) {
+ char string[4096] = {0,};
if (newloc->inode)
uuid_utoa_r (newloc->inode->gfid, newgfid);
else
@@ -1638,42 +1971,63 @@ trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
uuid_utoa_r (oldloc->inode->gfid, oldgfid);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path,
- newgfid, newloc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s", frame->root->unique,
+ oldgfid, oldloc->path, newgfid,
+ newloc->path);
+
frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ia_time = 0;
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%o)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%o)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s uid=%o,"
+ " gid=%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, stbuf->ia_uid,
+ stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
@@ -1685,44 +2039,64 @@ trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
localtime ((time_t *)&ia_time));
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, actime_str, modtime_str);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
frame->local = loc->inode->gfid;
}
+out:
STACK_WIND (frame, trace_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
-
int
trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ia_time = 0;
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, mode=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, mode=%o",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, uid=%o, "
+ "gid=%o", frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, stbuf->ia_uid, stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
@@ -1734,353 +2108,552 @@ trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
localtime ((time_t *)&ia_time));
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, actime_str, modtime_str);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
frame->local = fd->inode->gfid;
}
+out:
STACK_WIND (frame, trace_fsetattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
-
int
trace_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, offset=%"PRId64"",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "offset=%"PRId64"", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ offset);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
+ loc, offset, xdata);
return 0;
}
-
int
trace_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d fd=%p wbflags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags, fd, wbflags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags, fd);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
-
int
trace_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, fd=%p, flags=0%o mode=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd, flags, mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, fd=%p, "
+ "flags=0%o mode=0%o umask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ fd, flags, mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
-
int
trace_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"
+ GF_PRI_SIZET"offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
-
int
trace_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t offset, struct iobref *iobref)
+ off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, count=%d, offset=%"PRId64")",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, count, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, count=%d, "
+ " offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, count,
+ offset, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
-
int
-trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, (loc->inode)?
- uuid_utoa (loc->inode->gfid):"0", loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique, (loc->inode)?
+ uuid_utoa (loc->inode->gfid):"0", loc->path);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
-
int
-trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s flags=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), flags, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), flags, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-
int
trace_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
-
int
trace_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
trace_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
-trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s fd=%p",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path, fd);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
trace_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64,
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64" dict=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, dict);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
+ fd, size, offset, dict);
return 0;
}
-
int
trace_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64,
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64,
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
+ fd, size, offset, xdata);
return 0;
}
-
int
trace_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s datasync=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- datasync, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s datasync=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), datasync, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
-
int
-trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mask=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mask);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, mask);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
}
-
int32_t
trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len)
+ int32_t len, dict_t *xdata)
{
+
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" len=%u fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, len, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64
+ "len=%u fd=%p", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, len, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rchecksum_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rchecksum,
- fd, offset, len);
+ fd, offset, len, xdata);
return 0;
@@ -2089,162 +2662,264 @@ trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int32_t
trace_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+ entrylk_type type, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd=%p basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- volume, fd, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd=%p "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fentrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
return 0;
}
int32_t
trace_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p name=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p name=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, name);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fgetxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
- fd, name);
+ fd, name, xdata);
return 0;
}
int32_t
trace_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsetxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags);
+ fd, dict, flags, xdata);
return 0;
}
int
trace_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
+ fd, offset, xdata);
return 0;
}
-
int
-trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
return 0;
}
-
int
trace_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock)
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- cmd, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, cmd=%d, "
+ "lock {l_type=%d, "
+ "l_whence=%d, l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, cmd,
+ lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
+ fd, cmd, lock, xdata);
return 0;
}
int32_t
trace_forget (xlator_t *this, inode_t *inode)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
/* If user want to understand when a lookup happens,
he should know about 'forget' too */
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s", uuid_utoa (inode->gfid));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s", uuid_utoa (inode->gfid));
+
+ LOG_ELEMENT (conf, string);
}
+
+out:
return 0;
}
-
int32_t
trace_releasedir (xlator_t *this, fd_t *fd)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
}
+out:
return 0;
}
int32_t
trace_release (xlator_t *this, fd_t *fd)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled ||
trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
}
+
+out:
return 0;
}
-
void
enable_all_calls (int enabled)
{
@@ -2254,7 +2929,6 @@ enable_all_calls (int enabled)
trace_fop_names[i].enabled = enabled;
}
-
void
enable_call (const char *name, int enabled)
{
@@ -2282,6 +2956,105 @@ process_call_list (const char *list, int include)
}
}
+int32_t
+trace_dump_history (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+ trace_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("trace", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ conf = this->private;
+ // Is it ok to return silently if log-history option his off?
+ if (conf && conf->log_history == _gf_true) {
+ gf_proc_dump_build_key (key_prefix, "xlator.debug.trace",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_trace);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_trace_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ trace_conf_t *conf = NULL;
+ char *includes = NULL, *excludes = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ conf = this->private;
+
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ goto out;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+ /* Should resizing of the event-history be allowed in reconfigure?
+ * for which a new event_history might have to be allocated and the
+ * older history has to be freed.
+ */
+ GF_OPTION_RECONF ("log-file", conf->log_file, options, bool, out);
+
+ GF_OPTION_RECONF ("log-history", conf->log_history, options, bool, out);
+
+ ret = 0;
+
+out:
+ return ret;
+}
int32_t
init (xlator_t *this)
@@ -2289,6 +3062,10 @@ init (xlator_t *this)
dict_t *options = NULL;
char *includes = NULL, *excludes = NULL;
char *forced_loglevel = NULL;
+ eh_t *history = NULL;
+ int ret = -1;
+ size_t history_size = TRACE_DEFAULT_HISTORY_SIZE;
+ trace_conf_t *conf = NULL;
if (!this)
return -1;
@@ -2303,6 +3080,12 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
+ conf = GF_CALLOC (1, sizeof (trace_conf_t), gf_trace_mt_trace_conf_t);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot allocate "
+ "xl->private");
+ return -1;
+ }
options = this->options;
includes = data_to_str (dict_get (options, "include-ops"));
@@ -2311,8 +3094,13 @@ init (xlator_t *this)
{
int i;
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- trace_fop_names[i].name = (gf_fop_list[i] ?
- gf_fop_list[i] : ":O");
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
trace_fop_names[i].enabled = 1;
}
}
@@ -2320,14 +3108,43 @@ init (xlator_t *this)
if (includes && excludes) {
gf_log (this->name,
GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
return -1;
}
+
if (includes)
process_call_list (includes, 1);
if (excludes)
process_call_list (excludes, 0);
+
+ GF_OPTION_INIT ("history-size", conf->history_size, size, out);
+
+ gf_log (this->name, GF_LOG_INFO, "history size %"GF_PRI_SIZET,
+ history_size);
+
+ GF_OPTION_INIT ("log-file", conf->log_file, bool, out);
+
+ gf_log (this->name, GF_LOG_INFO, "logging to file %s",
+ (conf->log_file == _gf_true)?"enabled":"disabled");
+
+ GF_OPTION_INIT ("log-history", conf->log_history, bool, out);
+
+ gf_log (this->name, GF_LOG_DEBUG, "logging to history %s",
+ (conf->log_history == _gf_true)?"enabled":"disabled");
+
+ history = eh_new (history_size, _gf_false, NULL);
+ if (!history) {
+ gf_log (this->name, GF_LOG_ERROR, "event history cannot be "
+ "initialized");
+ return -1;
+ }
+
+ this->history = history;
+
+ conf->trace_log_level = GF_LOG_INFO;
+
if (dict_get (options, "force-log-level")) {
forced_loglevel = data_to_str (dict_get (options,
"force-log-level"));
@@ -2335,25 +3152,34 @@ init (xlator_t *this)
goto setloglevel;
if (strcmp (forced_loglevel, "INFO") == 0)
- trace_log_level = GF_LOG_INFO;
+ conf->trace_log_level = GF_LOG_INFO;
else if (strcmp (forced_loglevel, "TRACE") == 0)
- trace_log_level = GF_LOG_TRACE;
+ conf->trace_log_level = GF_LOG_TRACE;
else if (strcmp (forced_loglevel, "ERROR") == 0)
- trace_log_level = GF_LOG_ERROR;
+ conf->trace_log_level = GF_LOG_ERROR;
else if (strcmp (forced_loglevel, "DEBUG") == 0)
- trace_log_level = GF_LOG_DEBUG;
+ conf->trace_log_level = GF_LOG_DEBUG;
else if (strcmp (forced_loglevel, "WARNING") == 0)
- trace_log_level = GF_LOG_WARNING;
+ conf->trace_log_level = GF_LOG_WARNING;
else if (strcmp (forced_loglevel, "CRITICAL") == 0)
- trace_log_level = GF_LOG_CRITICAL;
+ conf->trace_log_level = GF_LOG_CRITICAL;
else if (strcmp (forced_loglevel, "NONE") == 0)
- trace_log_level = GF_LOG_NONE;
+ conf->trace_log_level = GF_LOG_NONE;
}
setloglevel:
- gf_log_set_loglevel (trace_log_level);
+ gf_log_set_loglevel (conf->trace_log_level);
+ this->private = conf;
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (history)
+ GF_FREE (history);
+ if (conf)
+ GF_FREE (conf);
+ }
- return 0;
+ return ret;
}
void
@@ -2362,6 +3188,9 @@ fini (xlator_t *this)
if (!this)
return;
+ if (this->history)
+ eh_destroy (this->history);
+
gf_log (this->name, GF_LOG_INFO,
"trace translator unloaded");
return;
@@ -2410,7 +3239,6 @@ struct xlator_fops fops = {
.fsetattr = trace_fsetattr,
};
-
struct xlator_cbks cbks = {
.release = trace_release,
.releasedir = trace_releasedir,
@@ -2426,5 +3254,21 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR
/*.value = { ""} */
},
+ { .key = {"history-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "1024",
+ },
+ { .key = {"log-file"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {"log-history"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
{ .key = {NULL} },
};
+
+struct xlator_dumpops dumpops = {
+ .history = trace_dump_history
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
new file mode 100644
index 000000000..045eefb36
--- /dev/null
+++ b/xlators/debug/trace/src/trace.h
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "event-history.h"
+#include "logging.h"
+#include "circ-buff.h"
+#include "statedump.h"
+#include "options.h"
+
+#define TRACE_DEFAULT_HISTORY_SIZE 1024
+
+typedef struct {
+ /* Since the longest fop name is fremovexattr i.e 12 characters, array size
+ * is kept 24, i.e double of the maximum.
+ */
+ char name[24];
+ int enabled;
+} trace_fop_name_t;
+
+trace_fop_name_t trace_fop_names[GF_FOP_MAXVALUE];
+
+typedef struct {
+ gf_boolean_t log_file;
+ gf_boolean_t log_history;
+ size_t history_size;
+ int trace_log_level;
+} trace_conf_t;
+
+#define TRACE_STACK_UNWIND(op, frame, params ...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ } while (0);
+
+#define LOG_ELEMENT(_conf, _string) \
+ do { \
+ if (_conf) { \
+ if ((_conf->log_history) == _gf_true) \
+ gf_log_eh ("%s", _string); \
+ if ((_conf->log_file) == _gf_true) \
+ gf_log (THIS->name, _conf->trace_log_level, \
+ "%s", _string); \
+ } \
+ } while (0);
+
+#define trace_stat_to_str(buf, statstr) \
+ do { \
+ char atime_buf[256] = {0,}; \
+ char mtime_buf[256] = {0,}; \
+ char ctime_buf[256] = {0,}; \
+ uint64_t ia_time = 0; \
+ \
+ if (!buf) \
+ break; \
+ \
+ ia_time = buf->ia_atime; \
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_mtime; \
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_ctime; \
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ snprintf (statstr, sizeof (statstr), \
+ "gfid=%s ino=%"PRIu64", mode=%o, " \
+ "nlink=%"GF_PRI_NLINK", uid=%u, " \
+ "gid=%u, size=%"PRIu64", " \
+ "blocks=%"PRIu64", atime=%s, " \
+ "mtime=%s, ctime=%s", \
+ uuid_utoa (buf->ia_gfid), buf->ia_ino, \
+ st_mode_from_ia (buf->ia_prot, \
+ buf->ia_type), \
+ buf->ia_nlink, buf->ia_uid, \
+ buf->ia_gid, buf->ia_size, \
+ buf->ia_blocks, atime_buf, \
+ mtime_buf, ctime_buf); \
+ } while (0);
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
index 2cbde680f..36efc6698 100644
--- a/xlators/encryption/Makefile.am
+++ b/xlators/encryption/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = rot-13
+SUBDIRS = rot-13 crypt
CLEANFILES =
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/encryption/crypt/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/xlators/cluster/unify/Makefile.am
+++ b/xlators/encryption/crypt/Makefile.am
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
new file mode 100644
index 000000000..faadd117f
--- /dev/null
+++ b/xlators/encryption/crypt/src/Makefile.am
@@ -0,0 +1,24 @@
+if ENABLE_CRYPT_XLATOR
+
+xlator_LTLIBRARIES = crypt.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
+
+crypt_la_LDFLAGS = -module -avoidversion -lssl -lcrypto
+
+crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
+crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+else
+
+noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+endif \ No newline at end of file
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
new file mode 100644
index 000000000..1ec41495c
--- /dev/null
+++ b/xlators/encryption/crypt/src/atom.c
@@ -0,0 +1,962 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/*
+ * Glossary
+ *
+ *
+ * cblock (or cipher block). A logical unit in a file.
+ * cblock size is defined as the number of bits
+ * in an input (or output) block of the block
+ * cipher (*). Cipher block size is a property of
+ * cipher algorithm. E.g. cblock size is 64 bits
+ * for DES, 128 bits for AES, etc.
+ *
+ * atomic cipher A cipher algorithm, which requires some chunks of
+ * algorithm text to be padded at left and(or) right sides before
+ * cipher transaform.
+ *
+ *
+ * block (atom) Minimal chunk of file's data, which doesn't require
+ * padding. We'll consider logical units in a file of
+ * block size (atom size).
+ *
+ * cipher algorithm Atomic cipher algorithm, which requires the last
+ * with EOF issue incomplete cblock in a file to be padded with some
+ * data (usually zeros).
+ *
+ *
+ * operation, which reading/writing from offset, which is not aligned to
+ * forms a gap at to atom size
+ * the beginning
+ *
+ *
+ * operation, which reading/writing count bytes starting from offset off,
+ * forms a gap at so that off+count is not aligned to atom_size
+ * the end
+ *
+ * head block the first atom affected by an operation, which forms
+ * a gap at the beginning, or(and) at the end.
+ * Сomment. Head block has at least one gap (either at
+ * the beginning, or at the end)
+ *
+ *
+ * tail block the last atom different from head, affected by an
+ * operation, which forms a gap at the end.
+ * Сomment: Tail block has exactly one gap (at the end).
+ *
+ *
+ * partial block head or tail block
+ *
+ *
+ * full block block without gaps.
+ *
+ *
+ * (*) Recommendation for Block Cipher Modes of Operation
+ * Methods and Techniques
+ * NIST Special Publication 800-38A Edition 2001
+ */
+
+/*
+ * atom->offset_at()
+ */
+static off_t offset_at_head(struct avec_config *conf)
+{
+ return conf->aligned_offset;
+}
+
+static off_t offset_at_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_hole_conf(frame));
+}
+
+static off_t offset_at_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_data_conf(frame));
+}
+
+
+static off_t offset_at_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0) +
+ (conf->nr_full_blocks << get_atom_bits(object));
+}
+
+static off_t offset_at_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_hole_conf(frame), object);
+}
+
+
+static off_t offset_at_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_data_conf(frame), object);
+}
+
+static off_t offset_at_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0);
+}
+
+static off_t offset_at_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_data_conf(frame), object);
+}
+
+static off_t offset_at_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->io_size_nopad()
+ */
+
+static uint32_t io_size_nopad_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ uint32_t gap_at_beg;
+ uint32_t gap_at_end;
+
+ check_head_block(conf);
+
+ gap_at_beg = conf->off_in_head;
+
+ if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0 )
+ gap_at_end = 0;
+ else
+ gap_at_end = get_atom_size(object) - conf->off_in_tail;
+
+ return get_atom_size(object) - (gap_at_beg + gap_at_end);
+}
+
+static uint32_t io_size_nopad_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_tail_block(conf);
+ return conf->off_in_tail;
+}
+
+static uint32_t io_size_nopad_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_full_block(conf);
+ return get_atom_size(object);
+}
+
+static uint32_t io_size_nopad_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_hole_conf(frame), object);
+}
+
+static uint32_t offset_in_head(struct avec_config *conf)
+{
+ check_cursor_head(conf);
+
+ return conf->off_in_head;
+}
+
+static uint32_t offset_in_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return 0;
+}
+
+static uint32_t offset_in_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_cursor_full(conf);
+
+ if (has_head_block(conf))
+ return (conf->cursor - 1) << get_atom_bits(object);
+ else
+ return conf->cursor << get_atom_bits(object);
+}
+
+static uint32_t offset_in_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_data_conf(frame));
+}
+
+static uint32_t offset_in_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_hole_conf(frame));
+}
+
+static uint32_t offset_in_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_data_conf(frame), object);
+}
+
+static uint32_t offset_in_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->rmw()
+ */
+/*
+ * Pre-conditions:
+ * @vec contains plain text of the latest
+ * version.
+ *
+ * Uptodate gaps of the @partial block with
+ * this plain text, encrypt the whole block
+ * and write the result to disk.
+ */
+static int32_t rmw_partial_block(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ struct rmw_atom *atom)
+{
+ size_t was_read = 0;
+ uint64_t file_size;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *partial = atom->get_iovec(frame, 0);
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_partial_block;
+#if DEBUG_CRYPT
+ gf_boolean_t check_last_cblock = _gf_false;
+#endif
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto exit;
+
+ file_size = local->cur_file_size;
+ was_read = op_ret;
+
+ if (atom->locality == HEAD_ATOM && conf->off_in_head) {
+ /*
+ * head atom with a non-uptodate gap
+ * at the beginning
+ *
+ * fill the gap with plain text of the
+ * latest version. Convert a part of hole
+ * (if any) to zeros.
+ */
+ int32_t i;
+ int32_t copied = 0;
+ int32_t to_gap; /* amount of data needed to uptodate
+ the gap at the beginning */
+#if 0
+ int32_t hole = 0; /* The part of the hole which
+ * got in the head block */
+#endif /* 0 */
+ to_gap = conf->off_in_head;
+
+ if (was_read < to_gap) {
+ if (file_size >
+ offset_at_head(conf) + was_read) {
+ /*
+ * It is impossible to uptodate
+ * head block: too few bytes have
+ * been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the beginning");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+#if 0
+ hole = to_gap - was_read;
+#endif /* 0 */
+ to_gap = was_read;
+ }
+ /*
+ * uptodate the gap at the beginning
+ */
+ for (i = 0; i < count && copied < to_gap; i++) {
+ int32_t to_copy;
+
+ to_copy = vec[i].iov_len;
+ if (to_copy > to_gap - copied)
+ to_copy = to_gap - copied;
+
+ memcpy(partial->iov_base, vec[i].iov_base, to_copy);
+ copied += to_copy;
+ }
+#if 0
+ /*
+ * If possible, convert part of the
+ * hole, which got in the head block
+ */
+ ret = TRY_LOCK(&local->hole_lock);
+ if (!ret) {
+ if (local->hole_handled)
+ /*
+ * already converted by
+ * crypt_writev_cbk()
+ */
+ UNLOCK(&local->hole_lock);
+ else {
+ /*
+ * convert the part of the hole
+ * which got in the head block
+ * to zeros.
+ *
+ * Update the orig_offset to make
+ * sure writev_cbk() won't care
+ * about this part of the hole.
+ *
+ */
+ memset(partial->iov_base + to_gap, 0, hole);
+
+ conf->orig_offset -= hole;
+ conf->orig_size += hole;
+ UNLOCK(&local->hole_lock);
+ }
+ }
+ else /*
+ * conversion is being performed
+ * by crypt_writev_cbk()
+ */
+ ;
+#endif /* 0 */
+ }
+ if (atom->locality == TAIL_ATOM ||
+ (!has_tail_block(conf) && conf->off_in_tail)) {
+ /*
+ * tail atom, or head atom with a non-uptodate
+ * gap at the end.
+ *
+ * fill the gap at the end of the block
+ * with plain text of the latest version.
+ * Pad the result, (if needed)
+ */
+ int32_t i;
+ int32_t to_gap;
+ int copied;
+ off_t off_in_tail;
+ int32_t to_copy;
+
+ off_in_tail = conf->off_in_tail;
+ to_gap = conf->gap_in_tail;
+
+ if (to_gap && was_read < off_in_tail + to_gap) {
+ /*
+ * It is impossible to uptodate
+ * the gap at the end: too few bytes
+ * have been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the end");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * uptodate the gap at the end
+ */
+ copied = 0;
+ to_copy = to_gap;
+ for(i = count - 1; i >= 0 && to_copy > 0; i--) {
+ uint32_t from_vec, off_in_vec;
+
+ off_in_vec = 0;
+ from_vec = vec[i].iov_len;
+ if (from_vec > to_copy) {
+ off_in_vec = from_vec - to_copy;
+ from_vec = to_copy;
+ }
+ memcpy(partial->iov_base +
+ off_in_tail + to_gap - copied - from_vec,
+ vec[i].iov_base + off_in_vec,
+ from_vec);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
+ (int)from_vec,
+ (int)off_in_tail + to_gap - copied - from_vec,
+ (int)off_in_vec);
+
+ copied += from_vec;
+ to_copy -= from_vec;
+ }
+ partial->iov_len = off_in_tail + to_gap;
+
+ if (object_alg_should_pad(object)) {
+ int32_t resid = 0;
+ resid = partial->iov_len & (object_alg_blksize(object) - 1);
+ if (resid) {
+ /*
+ * append a new EOF padding
+ */
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "set padding size %d",
+ local->eof_padding_size);
+
+ memset(partial->iov_base + partial->iov_len,
+ 1,
+ local->eof_padding_size);
+ partial->iov_len += local->eof_padding_size;
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "pad cblock with %d zeros:",
+ local->eof_padding_size);
+ dump_cblock(this,
+ (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ check_last_cblock = _gf_true;
+#endif
+ }
+ }
+ }
+ /*
+ * encrypt the whole block
+ */
+ encrypt_aligned_iov(object,
+ partial,
+ 1,
+ atom->offset_at(frame, object));
+#if DEBUG_CRYPT
+ if (check_last_cblock == _gf_true) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "encrypt last cblock with offset %llu",
+ (unsigned long long)atom->offset_at(frame, object));
+ dump_cblock(this, (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ }
+#endif
+ set_local_io_params_writev(frame, object, atom,
+ atom->offset_at(frame, object),
+ iovec_get_size(partial, 1));
+ /*
+ * write the whole block to disk
+ */
+ end_writeback_partial_block = dispatch_end_writeback(local->fop);
+ conf->cursor ++;
+ STACK_WIND(frame,
+ end_writeback_partial_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ partial,
+ 1,
+ atom->offset_at(frame, object),
+ local->flags,
+ local->iobref_data,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "submit partial block: %d bytes from %d offset",
+ (int)iovec_get_size(partial, 1),
+ (int)atom->offset_at(frame, object));
+ exit:
+ return 0;
+}
+
+/*
+ * Perform a (read-)modify-write sequence.
+ * This should be performed only after approval
+ * of upper server-side manager, i.e. the caller
+ * needs to make sure this is his turn to rmw.
+ */
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype)
+{
+ int32_t ret;
+ dict_t *dict;
+ struct rmw_atom *atom;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ atom = atom_by_types(local->active_setup, ltype);
+ /*
+ * To perform the "read" component of the read-modify-write
+ * sequence the crypt translator does stack_wind to itself.
+ *
+ * Pass current file size to crypt_readv()
+ */
+ dict = dict_new();
+ if (!dict) {
+ /*
+ * FIXME: Handle the error
+ */
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ return;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ /*
+ * FIXME: Handle the error
+ */
+ dict_unref(dict);
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ atom->rmw,
+ this,
+ this->fops->readv, /* crypt_readv */
+ fd,
+ atom->count_to_uptodate(frame, object), /* count */
+ atom->offset_at(frame, object), /* offset to read from */
+ 0,
+ dict);
+ exit:
+ dict_unref(dict);
+}
+
+/*
+ * submit blocks of FULL_ATOM type
+ */
+void submit_full(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
+ uint32_t count; /* total number of full blocks to submit */
+ uint32_t granularity; /* number of blocks to submit in one iteration */
+
+ uint64_t off_in_file; /* start offset in the file, bytes */
+ uint32_t off_in_atom; /* start offset in the atom, blocks */
+ uint32_t blocks_written = 0; /* blocks written for this submit */
+
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_full_block;
+ /*
+ * Write full blocks by groups of granularity size.
+ */
+ end_writeback_full_block = dispatch_end_writeback(local->fop);
+
+ if (is_ordered_mode(frame)) {
+ uint32_t skip = has_head_block(conf) ? 1 : 0;
+ count = 1;
+ granularity = 1;
+ /*
+ * calculate start offset using cursor value;
+ * here we should take into accout head block,
+ * which corresponds to cursor value 0.
+ */
+ off_in_file = atom->offset_at(frame, object) +
+ ((conf->cursor - skip) << get_atom_bits(object));
+ off_in_atom = conf->cursor - skip;
+ }
+ else {
+ /*
+ * in parallel mode
+ */
+ count = conf->nr_full_blocks;
+ granularity = MAX_IOVEC;
+ off_in_file = atom->offset_at(frame, object);
+ off_in_atom = 0;
+ }
+ while (count) {
+ uint32_t blocks_to_write = count;
+
+ if (blocks_to_write > granularity)
+ blocks_to_write = granularity;
+ if (conf->type == HOLE_ATOM)
+ /*
+ * reset iovec before encryption
+ */
+ memset(atom->get_iovec(frame, 0)->iov_base,
+ 0,
+ get_atom_size(object));
+ /*
+ * encrypt the group
+ */
+ encrypt_aligned_iov(object,
+ atom->get_iovec(frame,
+ off_in_atom +
+ blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written <<
+ get_atom_bits(object)));
+
+ set_local_io_params_writev(frame, object, atom,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ blocks_to_write << get_atom_bits(object));
+
+ conf->cursor += blocks_to_write;
+
+ STACK_WIND(frame,
+ end_writeback_full_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ atom->get_iovec(frame, off_in_atom + blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ local->flags,
+ local->iobref_data ? local->iobref_data : local->iobref,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
+ blocks_to_write,
+ (int)(off_in_file + (blocks_written << get_atom_bits(object))));
+
+ count -= blocks_to_write;
+ blocks_written += blocks_to_write;
+ }
+ return;
+}
+
+static int32_t rmw_data_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_data_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, TAIL_ATOM));
+}
+
+static int32_t rmw_hole_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_hole_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, TAIL_ATOM));
+}
+
+/*
+ * atom->count_to_uptodate()
+ */
+static uint32_t count_to_uptodate_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ if (conf->acount == 1 && conf->off_in_tail)
+ return get_atom_size(object);
+ else
+ /* there is no need to read the whole head block */
+ return conf->off_in_head;
+}
+
+static uint32_t count_to_uptodate_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ /* we need to read the whole tail block */
+ return get_atom_size(object);
+}
+
+static uint32_t count_to_uptodate_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_hole_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_hole_conf(frame), object);
+}
+
+/* atom->get_config() */
+
+static struct avec_config *get_config_data(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->data_conf;
+}
+
+static struct avec_config *get_config_hole(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->hole_conf;
+}
+
+/*
+ * atom->get_iovec()
+ */
+static struct iovec *get_iovec_hole_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec;
+}
+
+static struct iovec *get_iovec_hole_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0);
+}
+
+static inline struct iovec *get_iovec_hole_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->blocks_in_pool - 1);
+}
+
+static inline struct iovec *get_iovec_data_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec;
+}
+
+static inline struct iovec *get_iovec_data_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0) + count;
+}
+
+static inline struct iovec *get_iovec_data_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec +
+ (conf->off_in_head ? 1 : 0) +
+ conf->nr_full_blocks;
+}
+
+static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
+ [DATA_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_data_head,
+ .offset_at = offset_at_data_head,
+ .offset_in = offset_in_data_head,
+ .get_iovec = get_iovec_data_head,
+ .io_size_nopad = io_size_nopad_data_head,
+ .count_to_uptodate = count_to_uptodate_data_head,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_data_tail,
+ .offset_at = offset_at_data_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_data_tail,
+ .io_size_nopad = io_size_nopad_data_tail,
+ .count_to_uptodate = count_to_uptodate_data_tail,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_data_full,
+ .offset_in = offset_in_data_full,
+ .get_iovec = get_iovec_data_full,
+ .io_size_nopad = io_size_nopad_data_full,
+ .get_config = get_config_data
+ },
+ [HOLE_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_hole_head,
+ .offset_at = offset_at_hole_head,
+ .offset_in = offset_in_hole_head,
+ .get_iovec = get_iovec_hole_head,
+ .io_size_nopad = io_size_nopad_hole_head,
+ .count_to_uptodate = count_to_uptodate_hole_head,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_hole_tail,
+ .offset_at = offset_at_hole_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_hole_tail,
+ .io_size_nopad = io_size_nopad_hole_tail,
+ .count_to_uptodate = count_to_uptodate_hole_tail,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_hole_full,
+ .offset_in = offset_in_hole_full,
+ .get_iovec = get_iovec_hole_full,
+ .io_size_nopad = io_size_nopad_hole_full,
+ .get_config = get_config_hole
+ }
+};
+
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality)
+{
+ return &atoms[data][locality];
+}
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
new file mode 100644
index 000000000..7c212ad5d
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-common.h
@@ -0,0 +1,141 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_COMMON_H__
+#define __CRYPT_COMMON_H__
+
+#define INVAL_SUBVERSION_NUMBER (0xff)
+#define CRYPT_INVAL_OP (GF_FOP_NULL)
+
+#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
+#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
+#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
+#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
+#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
+#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
+#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
+
+
+/* messages for crypt_open() */
+#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
+#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
+
+#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
+
+#define noop do {; } while (0)
+#define cassert(cond) ({ switch (-1) { case (cond): case 0: break; } })
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+/*
+ * Format of file's metadata
+ */
+struct crypt_format {
+ uint8_t loader_id; /* version of metadata loader */
+ uint8_t versioned[0]; /* file's metadata of specific version */
+} __attribute__((packed));
+
+typedef enum {
+ AES_CIPHER_ALG,
+ LAST_CIPHER_ALG
+} cipher_alg_t;
+
+typedef enum {
+ XTS_CIPHER_MODE,
+ LAST_CIPHER_MODE
+} cipher_mode_t;
+
+typedef enum {
+ MTD_LOADER_V1,
+ LAST_MTD_LOADER
+} mtd_loader_id;
+
+static inline void msgflags_set_mtd_rlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline void msgflags_set_mtd_wlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_rlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_wlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_lock(uint32_t *flags)
+{
+ return msgflags_check_mtd_rlock(flags) ||
+ msgflags_check_mtd_wlock(flags);
+}
+
+/*
+ * returns number of logical blocks occupied
+ * (maybe partially) by @count bytes
+ * at offset @start.
+ */
+static inline off_t logical_blocks_occupied(uint64_t start, off_t count,
+ int blkbits)
+{
+ return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
+}
+
+/*
+ * are two bytes (represented by offsets @off1
+ * and @off2 respectively) in the same logical
+ * block.
+ */
+static inline int in_same_lblock(uint64_t off1, uint64_t off2,
+ int blkbits)
+{
+ return off1 >> blkbits == off2 >> blkbits;
+}
+
+static inline void dump_cblock(xlator_t *this, unsigned char *buf)
+{
+ gf_log(this->name, GF_LOG_DEBUG,
+ "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
+ (buf)[0],
+ (buf)[1],
+ (buf)[2],
+ (buf)[3],
+ (buf)[4],
+ (buf)[5],
+ (buf)[6],
+ (buf)[7],
+ (buf)[8],
+ (buf)[9],
+ (buf)[10],
+ (buf)[11],
+ (buf)[12],
+ (buf)[13],
+ (buf)[14],
+ (buf)[15]);
+}
+
+#endif /* __CRYPT_COMMON_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
new file mode 100644
index 000000000..799727573
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-mem-types.h
@@ -0,0 +1,43 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CRYPT_MEM_TYPES_H__
+#define __CRYPT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_crypt_mem_types_ {
+ gf_crypt_mt_priv = gf_common_mt_end + 1,
+ gf_crypt_mt_inode,
+ gf_crypt_mt_data,
+ gf_crypt_mt_mtd,
+ gf_crypt_mt_loc,
+ gf_crypt_mt_iatt,
+ gf_crypt_mt_key,
+ gf_crypt_mt_iovec,
+ gf_crypt_mt_char,
+};
+
+#endif /* __CRYPT_MEM_TYPES_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
+
+
+
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
new file mode 100644
index 000000000..db2e6d83c
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.c
@@ -0,0 +1,4498 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <ctype.h>
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+static int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t from, off_t size);
+static int32_t load_file_size(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata);
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void put_one_call_open(call_frame_t *frame);
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this);
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this);
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
+static void free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
+static void free_avec_data(crypt_local_t *local);
+static void free_avec_hole(crypt_local_t *local);
+
+static crypt_local_t *crypt_alloc_local(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop)
+{
+ crypt_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ return NULL;
+ }
+ local->fop = fop;
+ LOCK_INIT(&local->hole_lock);
+ LOCK_INIT(&local->call_lock);
+ LOCK_INIT(&local->rw_count_lock);
+
+ frame->local = local;
+ return local;
+}
+
+struct crypt_inode_info *get_crypt_inode_info(inode_t *inode, xlator_t *this)
+{
+ int ret;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not get inode info");
+ return NULL;
+ }
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not obtain inode info");
+ return NULL;
+ }
+ return info;
+}
+
+static struct crypt_inode_info *local_get_inode_info(crypt_local_t *local,
+ xlator_t *this)
+{
+ if (local->info)
+ return local->info;
+ local->info = get_crypt_inode_info(local->fd->inode, this);
+ return local->info;
+}
+
+static struct crypt_inode_info *alloc_inode_info(crypt_local_t *local,
+ loc_t *loc)
+{
+ struct crypt_inode_info *info;
+
+ info = GF_CALLOC(1, sizeof(*info), gf_crypt_mt_inode);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_WARNING,
+ "Can not allocate inode info");
+ return NULL;
+ }
+ memset(info, 0, sizeof(*info));
+#if DEBUG_CRYPT
+ info->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!info->loc) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
+ GF_FREE(info);
+ return NULL;
+ }
+ if (loc_copy(info->loc, loc)){
+ GF_FREE(info->loc);
+ GF_FREE(info);
+ return NULL;
+ }
+#endif /* DEBUG_CRYPT */
+
+ local->info = info;
+ return info;
+}
+
+static void free_inode_info(struct crypt_inode_info *info)
+{
+#if DEBUG_CRYPT
+ loc_wipe(info->loc);
+ GF_FREE(info->loc);
+#endif
+ memset(info, 0, sizeof(*info));
+ GF_FREE(info);
+}
+
+int crypt_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ if (!inode_ctx_del (inode, this, &ctx_addr))
+ free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
+ return 0;
+}
+
+#if DEBUG_CRYPT
+static void check_read(call_frame_t *frame, xlator_t *this, int32_t read,
+ struct iovec *vec, int32_t count, struct iatt *stbuf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = get_object_cinfo(local->info);
+ struct avec_config *conf = &local->data_conf;
+ uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
+
+ if (read <= 0)
+ return;
+ if (read != iovec_get_size(vec, count))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "op_ret differs from amount of read bytes");
+
+ if (object_alg_should_pad(object) && (read & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (!= 0 mod(cblock size))");
+
+ if (conf->aligned_offset + read >
+ stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (too large))");
+
+}
+
+#define PT_BYTES_TO_DUMP (32)
+static void dump_plain_text(crypt_local_t *local, struct iovec *avec)
+{
+ int32_t to_dump;
+ char str[PT_BYTES_TO_DUMP + 1];
+
+ if (!avec)
+ return;
+ to_dump = avec->iov_len;
+ if (to_dump > PT_BYTES_TO_DUMP)
+ to_dump = PT_BYTES_TO_DUMP;
+ memcpy(str, avec->iov_base, to_dump);
+ memset(str + to_dump, '0', 1);
+ gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
+}
+
+static int32_t data_conf_invariant(struct avec_config *conf)
+{
+ return conf->acount ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ conf->nr_full_blocks;
+}
+
+static int32_t hole_conf_invariant(struct avec_config *conf)
+{
+ return conf->blocks_in_pool ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ !!has_full_blocks(conf);
+}
+
+static void crypt_check_conf(struct avec_config *conf)
+{
+ int32_t ret = 0;
+ const char *msg;
+
+ switch (conf->type) {
+ case DATA_ATOM:
+ msg = "data";
+ ret = data_conf_invariant(conf);
+ break;
+ case HOLE_ATOM:
+ msg = "hole";
+ ret = hole_conf_invariant(conf);
+ break;
+ default:
+ msg = "unknown";
+ }
+ if (!ret)
+ gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
+}
+
+static void check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ uint64_t local_file_size;
+
+ switch(local->fop) {
+ case GF_FOP_FTRUNCATE:
+ return;
+ case GF_FOP_WRITE:
+ local_file_size = local->new_file_size;
+ break;
+ case GF_FOP_READ:
+ if (parent_is_crypt_xlator(frame, this))
+ return;
+ local_file_size = local->cur_file_size;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
+ return;
+ }
+ if (buf->ia_size != round_up(local_file_size,
+ object_alg_blksize(object)))
+ gf_log("crypt", GF_LOG_DEBUG,
+ "bad ia_size in buf (%llu), should be %llu",
+ (unsigned long long)buf->ia_size,
+ (unsigned long long)round_up(local_file_size,
+ object_alg_blksize(object)));
+}
+
+#else
+#define check_read(frame, this, op_ret, vec, count, stbuf) noop
+#define dump_plain_text(local, avec) noop
+#define crypt_check_conf(conf) noop
+#define check_buf(frame, this, buf) noop
+#endif /* DEBUG_CRYPT */
+
+/*
+ * Pre-conditions:
+ * @vec represents a ciphertext of expanded size and
+ * aligned offset.
+ *
+ * Compound a temporal vector @avec with block-aligned
+ * components, decrypt and fix it up to represent a chunk
+ * of data corresponding to the original size and offset.
+ * Pass the result to the next translator.
+ */
+int32_t crypt_readv_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *avec;
+ uint32_t i;
+ uint32_t to_vec;
+ uint32_t to_user;
+
+ check_buf(frame, this, stbuf);
+ check_read(frame, this, op_ret, vec, count, stbuf);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->iobref = iobref_ref(iobref);
+
+ local->buf = *stbuf;
+ local->buf.ia_size = local->cur_file_size;
+
+ if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
+ goto put_one_call;
+
+ if (conf->orig_offset >= local->cur_file_size) {
+ local->op_ret = 0;
+ goto put_one_call;
+ }
+ /*
+ * correct config params with real file size
+ * and actual amount of bytes read
+ */
+ set_config_offsets(frame, this,
+ conf->orig_offset, op_ret, DATA_ATOM, 0);
+
+ if (conf->orig_offset + conf->orig_size > local->cur_file_size)
+ conf->orig_size = local->cur_file_size - conf->orig_offset;
+ /*
+ * calculate amount of data to be returned
+ * to user.
+ */
+ to_user = op_ret;
+ if (conf->aligned_offset + to_user <= conf->orig_offset) {
+ gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ to_user -= (conf->aligned_offset - conf->orig_offset);
+
+ if (to_user > conf->orig_size)
+ to_user = conf->orig_size;
+ local->rw_count = to_user;
+
+ op_errno = set_config_avec_data(this, local,
+ conf, object, vec, count);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto put_one_call;
+ }
+ avec = conf->avec;
+#if DEBUG_CRYPT
+ if (conf->off_in_tail != 0 &&
+ conf->off_in_tail < object_alg_blksize(object) &&
+ object_alg_should_pad(object))
+ gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
+ conf->off_in_tail);
+ if (iovec_get_size(vec, count) != 0 &&
+ in_same_lblock(conf->orig_offset + iovec_get_size(vec, count) - 1,
+ local->cur_file_size - 1,
+ object_alg_blkbits(object))) {
+ gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
+ dump_cblock(this,
+ (unsigned char *)(avec[conf->acount - 1].iov_base) +
+ avec[conf->acount - 1].iov_len - object_alg_blksize(object));
+ dump_cblock(this,
+ (unsigned char *)(vec[count - 1].iov_base) +
+ vec[count - 1].iov_len - object_alg_blksize(object));
+ }
+#endif
+ decrypt_aligned_iov(object, avec,
+ conf->acount, conf->aligned_offset);
+ /*
+ * pass proper plain data to user
+ */
+ avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
+ avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
+
+ to_vec = to_user;
+ for (i = 0; i < conf->acount; i++) {
+ if (avec[i].iov_len > to_vec)
+ avec[i].iov_len = to_vec;
+ to_vec -= avec[i].iov_len;
+ }
+ put_one_call:
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t do_readv(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ local->flags,
+ local->xdata);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call(frame);
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t crypt_readv_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size
+ */
+ STACK_WIND(frame,
+ do_readv,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ fd_unref(local->fd);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ op_errno,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t readv_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "stat failed (%d)", op_errno);
+ goto error;
+ }
+ local->buf = *buf;
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t crypt_readv(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
+ (int)size, (long long)offset);
+ if (parent_is_crypt_xlator(frame, this))
+ gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_READ);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ if (size == 0)
+ goto trivial;
+
+ local->fd = fd_ref(fd);
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ set_config_offsets(frame, this, offset, size,
+ DATA_ATOM, 0);
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * We are called by crypt_writev (or cypt_ftruncate)
+ * to perform the "read" component of the read-modify-write
+ * (or read-prune-write) sequence for some atom;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ *
+ * Retrieve current file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ flags,
+ NULL);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_readv_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ readv_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ ret,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom,
+ off_t io_offset,
+ uint32_t io_size)
+{
+ crypt_local_t *local = frame->local;
+
+ local->io_offset = io_offset;
+ local->io_size = io_size;
+
+ local->io_offset_nopad =
+ atom->offset_at(frame, object) + atom->offset_in(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad offset to %llu",
+ (unsigned long long)local->io_offset_nopad);
+
+ local->io_size_nopad = atom->io_size_nopad(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad size to %llu",
+ (unsigned long long)local->io_size_nopad);
+
+ local->update_disk_file_size = 0;
+ /*
+ * NOTE: eof_padding_size is 0 for all full atoms;
+ * For head and tail atoms it will be set up at rmw_partial block()
+ */
+ local->new_file_size = local->cur_file_size;
+
+ if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
+
+ local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set new file size to %llu",
+ (unsigned long long)local->new_file_size);
+
+ local->update_disk_file_size = 1;
+ }
+}
+
+void set_local_io_params_ftruncate(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ uint32_t resid;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ resid = conf->orig_offset & (object_alg_blksize(object) - 1);
+ if (resid) {
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+ local->new_file_size = conf->aligned_offset;
+ local->update_disk_file_size = 0;
+ /*
+ * file size will be updated
+ * in the ->writev() stack,
+ * when submitting file tail
+ */
+ }
+ else {
+ local->eof_padding_size = 0;
+ local->new_file_size = conf->orig_offset;
+ local->update_disk_file_size = 1;
+ /*
+ * file size will be updated
+ * in this ->ftruncate stack
+ */
+ }
+}
+
+static inline void submit_head(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, HEAD_ATOM);
+}
+
+static inline void submit_tail(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, TAIL_ATOM);
+}
+
+static void submit_hole(call_frame_t *frame, xlator_t *this)
+{
+ /*
+ * hole conversion always means
+ * appended write and goes in ordered fashion
+ */
+ do_ordered_submit(frame, this, HOLE_ATOM);
+}
+
+static void submit_data(call_frame_t *frame, xlator_t *this)
+{
+ if (is_ordered_mode(frame)) {
+ do_ordered_submit(frame, this, DATA_ATOM);
+ return;
+ }
+ gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
+ get_nr_calls(frame, nr_calls_data(frame));
+ do_parallel_submit(frame, this, DATA_ATOM);
+ return;
+}
+
+/*
+ * heplers called by writev_cbk, fruncate_cbk in ordered mode
+ */
+
+static inline int32_t should_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ return conf->avec != NULL;
+}
+
+static inline int32_t should_resume_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
+ /*
+ * Don't submit a part of hole, which
+ * fits into a data block:
+ * this part of hole will be converted
+ * as a gap filled by zeros in data head
+ * block.
+ */
+ return conf->cursor < conf->acount - 1;
+ else
+ return conf->cursor < conf->acount;
+}
+
+static inline int32_t should_resume_submit_data(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ if (is_ordered_mode(frame))
+ return conf->cursor < conf->acount;
+ /*
+ * parallel writes
+ */
+ return 0;
+}
+
+static inline int32_t should_submit_data_after_hole(crypt_local_t *local)
+{
+ return local->data_conf.avec != NULL;
+}
+
+static void update_local_file_params(call_frame_t *frame,
+ xlator_t *this,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ crypt_local_t *local = frame->local;
+
+ check_buf(frame, this, postbuf);
+
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->new_file_size;
+
+ local->cur_file_size = local->new_file_size;
+}
+
+static int32_t end_writeback_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret <= 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "writev iteration failed");
+ goto put_one_call;
+ }
+ /*
+ * op_ret includes paddings (atom's head, atom's tail and EOF)
+ */
+ if (op_ret < local->io_size) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Incomplete writev iteration");
+ goto put_one_call;
+ }
+ op_ret -= local->eof_padding_size;
+ local->op_ret = op_ret;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local)) {
+
+ LOCK(&local->rw_count_lock);
+ local->rw_count += op_ret;
+ UNLOCK(&local->rw_count_lock);
+
+ if (should_resume_submit_data(frame))
+ submit_data(frame, this);
+ }
+ else {
+ /*
+ * hole conversion is going on;
+ * don't take into account written zeros
+ */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+
+ else if (should_submit_data_after_hole(local))
+ submit_data(frame, this);
+ }
+ put_one_call:
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+#define crypt_writev_cbk end_writeback_writev
+
+#define HOLE_WRITE_CHUNK_BITS 12
+#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
+
+/*
+ * Convert hole of size @size at offset @off to
+ * zeros and prepare respective iovecs for submit.
+ * The hole lock should be held.
+ *
+ * Pre-conditions:
+ * @local->file_size is set and valid.
+ */
+int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t off, off_t size)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
+
+ ret = set_config_avec_hole(this, local,
+ &local->hole_conf, object, local->fop);
+ crypt_check_conf(&local->hole_conf);
+
+ return ret;
+}
+
+/*
+ * prepare for submit @count bytes at offset @from
+ */
+int32_t prepare_for_submit_data(call_frame_t *frame, xlator_t *this,
+ off_t from, int32_t size, struct iovec *vec,
+ int32_t vec_count, int32_t setup_gap)
+{
+ uint32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, from, size,
+ DATA_ATOM, setup_gap);
+
+ ret = set_config_avec_data(this, local,
+ &local->data_conf, object, vec, vec_count);
+ crypt_check_conf(&local->data_conf);
+
+ return ret;
+}
+
+static void free_avec(struct iovec *avec,
+ char **pool, int blocks_in_pool)
+{
+ if (!avec)
+ return;
+ GF_FREE(pool);
+ GF_FREE(avec);
+}
+
+static void free_avec_data(crypt_local_t *local)
+{
+ return free_avec(local->data_conf.avec,
+ local->data_conf.pool,
+ local->data_conf.blocks_in_pool);
+}
+
+static void free_avec_hole(crypt_local_t *local)
+{
+ return free_avec(local->hole_conf.avec,
+ local->hole_conf.pool,
+ local->hole_conf.blocks_in_pool);
+}
+
+
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (has_head_block(conf))
+ submit_head(frame, this);
+
+ if (has_full_blocks(conf))
+ submit_full(frame, this);
+
+ if (has_tail_block(conf))
+ submit_tail(frame, this);
+ return;
+}
+
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (should_submit_head_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_head(frame, this);
+ }
+ else if (should_submit_full_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_full(frame, this);
+ }
+ else if (should_submit_tail_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_tail(frame, this);
+ }
+ else
+ gf_log("crypt", GF_LOG_DEBUG,
+ "nothing has been submitted in ordered mode");
+ return;
+}
+
+static int32_t do_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
+
+ if (local->cur_file_size < local->data_conf.orig_offset) {
+ /*
+ * Set up hole config
+ */
+ op_errno = prepare_for_submit_hole(frame,
+ this,
+ local->cur_file_size,
+ local->data_conf.orig_offset - local->cur_file_size);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto error;
+ }
+ }
+ if (should_submit_hole(local))
+ submit_hole(frame, this);
+ else
+ submit_data(frame, this);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t crypt_writev_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_writev,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t writev_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+int crypt_writev(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vec,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+#if DEBUG_CRYPT
+ gf_log ("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
+ (int)iovec_get_size(vec, count), (long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ if (iobref)
+ local->iobref = iobref_ref(iobref);
+ /*
+ * to update real file size on the server
+ */
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (iovec_get_size(vec, count) == 0)
+ goto trivial;
+
+ ret = prepare_for_submit_data(frame, this, offset,
+ iovec_get_size(vec, count),
+ vec, count, 0 /* don't setup gup
+ in tail: we don't
+ know file size yet */);
+ if (ret)
+ goto error;
+
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * we are called by shinking crypt_ftruncate(),
+ * which doesn't perform hole conversion;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ */
+
+ /*
+ * extract file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ submit_data(frame, this);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ /*
+ * lock the file and retrieve its size
+ */
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_writev_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ writev_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->iobref)
+ iobref_unref(iobref);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(writev, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ set_config_offsets(frame, this,
+ offset,
+ 0, /* count */
+ DATA_ATOM,
+ 0 /* since we prune, there is no
+ gap in tail to uptodate */);
+ return 0;
+}
+
+/*
+ * Finish the read-prune-modify sequence
+ *
+ * Can be invoked as
+ * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
+ * 2) ->writev_cbk() for non-cblock-aligned prune
+ */
+
+static int32_t prune_complete(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as ->ftruncate_cbk()
+ *
+ * Perform the "write" component of the
+ * read-prune-write sequence.
+ *
+ * submuit the rest of the file
+ */
+static int32_t prune_submit_file_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ dict_t *dict;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+ local->new_file_size = conf->orig_offset;
+
+ /*
+ * The rest of the file is a partial block and, hence,
+ * should be written via RMW sequence, so the crypt xlator
+ * does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_writev()
+ */
+ op_errno = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (op_errno) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ dict_unref(dict);
+ goto error;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "passing current file size (%llu) to crypt_writev",
+ (unsigned long long)local->cur_file_size);
+ /*
+ * Padding will be filled with
+ * zeros by rmw_partial_block()
+ */
+ STACK_WIND(frame,
+ prune_complete,
+ this,
+ this->fops->writev, /* crypt_writev */
+ local->fd,
+ &local->vec,
+ 1,
+ conf->aligned_offset, /* offset to write from */
+ 0,
+ local->iobref,
+ dict);
+
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as a callback of ->writev() invoked in behalf
+ * of ftruncate(): it can be
+ * 1) ordered writes issued by hole conversion in the case of
+ * expanded truncate, or
+ * 2) an rmw partial data block issued by non-cblock-aligned
+ * prune.
+ */
+int32_t end_writeback_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ /*
+ * if nothing has been written,
+ * then it must be an error
+ */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local))
+ /* case (2) */
+ goto put_one_call;
+ /* case (1) */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+ /*
+ * case of hole, when we should't resume
+ */
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform prune and write components of the
+ * read-prune-write sequence.
+ *
+ * Called as ->readv_cbk()
+ *
+ * Pre-conditions:
+ * @vec contains the latest atom of the file
+ * (plain text)
+ */
+static int32_t prune_write(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t i;
+ size_t to_copy;
+ size_t copied = 0;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret == -1)
+ goto put_one_call;
+
+ /*
+ * At first, uptodate head block
+ */
+ if (iovec_get_size(vec, count) < conf->off_in_head) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to uptodate head block for prune");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ local->vec.iov_len = conf->off_in_head;
+ local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len,
+ gf_crypt_mt_data);
+
+ if (local->vec.iov_base == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+ for (i = 0; i < count; i++) {
+ to_copy = vec[i].iov_len;
+ if (to_copy > local->vec.iov_len - copied)
+ to_copy = local->vec.iov_len - copied;
+
+ memcpy((char *)local->vec.iov_base + copied,
+ vec[i].iov_base,
+ to_copy);
+ copied += to_copy;
+ if (copied == local->vec.iov_len)
+ break;
+ }
+ /*
+ * perform prune with aligned offset
+ * (i.e. at this step we prune a bit
+ * more then it is needed
+ */
+ STACK_WIND(frame,
+ prune_submit_file_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->aligned_offset,
+ local->xdata);
+ return 0;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform a read-prune-write sequence
+ */
+int32_t read_prune_write(call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_local_io_params_ftruncate(frame, object);
+ get_one_call_nolock(frame);
+
+ if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
+ /*
+ * cblock-aligned prune:
+ * we don't need read and write components,
+ * just cut file body
+ */
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune without RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+
+ STACK_WIND(frame,
+ prune_complete,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->orig_offset,
+ local->xdata);
+ return 0;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune with RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+ /*
+ * We are about to perform the "read" component of the
+ * read-prune-write sequence. It means that we need to
+ * read encrypted data from disk and decrypt it.
+ * So, the crypt translator does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_readv()
+
+ */
+ dict = dict_new();
+ if (!dict) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ ret = ENOMEM;
+ goto exit;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ prune_write,
+ this,
+ this->fops->readv, /* crypt_readv */
+ local->fd,
+ get_atom_size(object), /* bytes to read */
+ conf->aligned_offset, /* offset to read from */
+ 0,
+ dict);
+ exit:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*
+ * File prune is more complicated than expand.
+ * First we need to read the latest atom to not lose info
+ * needed for proper update. Also we need to make sure that
+ * every component of read-prune-write sequence leaves data
+ * consistent
+ *
+ * Non-cblock aligned prune is performed as read-prune-write
+ * sequence:
+ *
+ * 1) read the latest atom;
+ * 2) perform cblock-aligned prune
+ * 3) issue a write request for the end-of-file
+ */
+int32_t prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ int32_t ret;
+
+ ret = prepare_for_prune(frame, this, offset);
+ if (ret)
+ return ret;
+ return read_prune_write(frame, this);
+}
+
+int32_t expand_file(call_frame_t *frame, xlator_t *this,
+ uint64_t offset)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+
+ ret = prepare_for_submit_hole(frame, this,
+ local->old_file_size,
+ offset - local->old_file_size);
+ if (ret)
+ return ret;
+ submit_hole(frame, this);
+ return 0;
+}
+
+static int32_t ftruncate_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t do_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ if (local->data_conf.orig_offset == local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG,
+ "trivial ftruncate (current file size %llu)",
+ (unsigned long long)local->cur_file_size);
+#endif
+ goto trivial;
+ }
+ else if (local->data_conf.orig_offset < local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = prune_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ else {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = expand_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ if (op_errno)
+ goto error;
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ ftruncate_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ /*
+ * finish with ftruncate
+ */
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_ftruncate,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * ftruncate is performed in 2 steps:
+ * . recieve file size;
+ * . expand or prune file.
+ */
+static int32_t crypt_ftruncate(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ local->data_conf.orig_offset = offset;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_ftruncate_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+/* ->flush_cbk() */
+int32_t truncate_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local->xdata);
+ return 0;
+}
+
+/* ftruncate_cbk() */
+int32_t truncate_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *fd = local->fd;
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ STACK_WIND(frame,
+ truncate_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd,
+ NULL);
+ fd_unref(fd);
+ return 0;
+}
+
+/*
+ * is called as ->open_cbk()
+ */
+static int32_t truncate_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0) {
+ fd_unref(fd);
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+ /*
+ * crypt_truncate() is implemented via crypt_ftruncate(),
+ * so the crypt xlator does STACK_WIND to itself here
+ */
+ STACK_WIND(frame,
+ truncate_flush,
+ this,
+ this->fops->ftruncate, /* crypt_ftruncate */
+ fd,
+ local->offset,
+ NULL);
+ return 0;
+}
+
+/*
+ * crypt_truncate() is implemented via crypt_ftruncate() as a
+ * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
+ */
+int32_t crypt_truncate(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset,
+ dict_t *xdata)
+{
+ fd_t *fd;
+ crypt_local_t *local;
+
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "truncate file %s at offset %llu",
+ loc->path, (unsigned long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
+ if (!local)
+ goto error;
+
+ fd = fd_create(loc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->offset = offset;
+ local->xdata = xdata;
+ STACK_WIND(frame,
+ truncate_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ loc,
+ O_RDWR,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return end_writeback_writev;
+ case GF_FOP_FTRUNCATE:
+ return end_writeback_ftruncate;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
+ return NULL;
+ }
+}
+
+/*
+ * true, if the caller needs metadata string
+ */
+static int32_t is_custom_mtd(dict_t *xdata)
+{
+ data_t *data;
+ uint32_t flags;
+
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, MSGFLAGS_PREFIX);
+ if (!data)
+ return 0;
+ if (data->len != sizeof(uint32_t)) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Bad msgflags size (%d)", data->len);
+ return -1;
+ }
+ flags = *((uint32_t *)data->data);
+ return msgflags_check_mtd_lock(&flags);
+}
+
+static int32_t crypt_open_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret < 0)
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ put_one_call_open(frame);
+ return 0;
+}
+
+static void crypt_open_tail(call_frame_t *frame, xlator_t *this)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_open_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+}
+
+/*
+ * load private inode info at open time
+ * called as ->fgetxattr_cbk()
+ */
+static int load_mtd_open(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret;
+ gf_boolean_t upload_info;
+ data_t *mtd;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ /*
+ * first, check for cached info
+ */
+ ret = inode_ctx_get(local->fd->inode, this, &value);
+ if (ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Inode info expected, but not found");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * info has been found in the cache
+ */
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info hasn't been found in the cache.
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto exit;
+ }
+ init_inode_info_head(info, local->fd);
+ upload_info = _gf_true;
+ }
+ /*
+ * extract metadata
+ */
+ mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
+ if (!mtd) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Format string wasn't found");
+ goto exit;
+ }
+ /*
+ * authenticate metadata against the path
+ */
+ ret = open_format((unsigned char *)mtd->data,
+ mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ if (upload_info) {
+ ret = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ ret = inode_ctx_put(local->fd->inode,
+ this, (uint64_t)(long)info);
+ if (ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ }
+ if (local->custom_mtd) {
+ /*
+ * pass the metadata string to the customer
+ */
+ ret = dict_set_static_bin(local->xdata,
+ CRYPTO_FORMAT_PREFIX,
+ mtd->data,
+ mtd->len);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ }
+ exit:
+ if (!local->custom_mtd)
+ crypt_open_tail(frame, this);
+ else
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ load_mtd_open,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ CRYPTO_FORMAT_PREFIX,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+/*
+ * verify metadata against the specified pathname
+ */
+static int32_t crypt_open_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ else if (local->custom_mtd){
+ local->xdata = dict_new();
+ if (!local->xdata) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_ERROR,
+ "Can not get new dict for mtd string");
+ goto exit;
+ }
+ }
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_open_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc) {
+ ret = ENOMEM;
+ goto error;
+ }
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ ret = is_custom_mtd(xdata);
+ if (ret < 0) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ ret = EINVAL;
+ goto error;
+ }
+ local->custom_mtd = ret;
+
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ /*
+ * we can't open O_WRONLY, because
+ * we need to do read-modify-write
+ */
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ get_one_call_nolock(frame);
+ STACK_WIND(frame,
+ crypt_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(open,
+ frame,
+ -1,
+ ret,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct object_cipher_info *object = &info->cinfo;
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
+ uuid_utoa(info->oid));
+#endif
+ ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
+ master);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Init inode info at ->create() time
+ */
+static void init_inode_info_create(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ data_t *data)
+{
+ struct object_cipher_info *object;
+
+ info->nr_minor = CRYPT_XLATOR_ID;
+ memcpy(info->oid, data->data, data->len);
+
+ object = &info->cinfo;
+
+ object->o_alg = master->m_alg;
+ object->o_mode = master->m_mode;
+ object->o_block_bits = master->m_block_bits;
+ object->o_dkey_size = master->m_dkey_size;
+}
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
+{
+ memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
+}
+
+static int32_t crypt_create_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_private_t *priv = this->private;
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0) {
+ free_inode_info(info);
+ goto unwind;
+ }
+ op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (op_errno) {
+ op_ret = -1;
+ free_inode_info(info);
+ goto unwind;
+ }
+ /*
+ * FIXME: drop major subversion number
+ */
+ op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
+ if (op_ret == -1) {
+ op_errno = EIO;
+ free_inode_info(info);
+ goto unwind;
+ }
+ unwind:
+ free_format(local);
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int crypt_create_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ dict_unref(local->xattr);
+
+ if (op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_create_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(local->info);
+ free_format(local);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_create_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+
+ STACK_WIND(frame,
+ crypt_create_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * Create and store crypt-specific format on disk;
+ * Populate cache with private inode info
+ */
+static int32_t crypt_create_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_create_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_create(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ mode_t umask,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int ret;
+ data_t *data;
+ crypt_local_t *local;
+ crypt_private_t *priv;
+ struct master_cipher_info *master;
+ struct crypt_inode_info *info;
+
+ priv = this->private;
+ master = get_master_cinfo(priv);
+
+ if (master_alg_atomic(master)) {
+ /*
+ * We can't open O_WRONLY, because we
+ * need to do read-modify-write.
+ */
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ }
+ local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ data = dict_get(xdata, "gfid-req");
+ if (!data) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING, "gfid not found");
+ goto error;
+ }
+ if (data->len != sizeof(uuid_t)) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad gfid size (%d), should be %d",
+ (int)data->len, (int)sizeof(uuid_t));
+ goto error;
+ }
+ info = alloc_inode_info(local, loc);
+ if (!info){
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * NOTE:
+ * format has to be created BEFORE
+ * proceeding to the untrusted server
+ */
+ ret = alloc_format_create(local);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ init_inode_info_create(info, master, data);
+
+ ret = create_format(local->format,
+ loc,
+ info,
+ master);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_format_size());
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame,
+ crypt_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ umask,
+ fd,
+ xdata);
+ return 0;
+ error:
+ gf_log("crypt", GF_LOG_WARNING, "can not create file");
+ STACK_UNWIND_STRICT(create,
+ frame,
+ -1,
+ ret,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/*
+ * FIXME: this should depends on the version of format string
+ */
+static int32_t filter_crypt_xattr(dict_t *dict,
+ char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
+static int32_t crypt_fsetxattr(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * TBD: verify file metadata before wind
+ */
+static int32_t crypt_setxattr(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t linkop_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0 &&
+ op_errno == ENOENT &&
+ local->loc->inode->ia_type == IA_IFLNK) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * unpin inode on the server
+ */
+static int32_t link_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ link_unwind(frame);
+ return 0;
+}
+
+void link_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ inode_t *inode;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(link,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ inode = local->inode;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc) {
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(link,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (inode)
+ inode_unref(inode);
+}
+
+void link_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ link_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+/*
+ * unlink()
+ */
+static int32_t unlink_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unlink_unwind(frame);
+ return 0;
+}
+
+void unlink_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+}
+
+void unlink_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ unlink_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ local->loc,
+ local->flags,
+ local->xdata);
+}
+
+void rename_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ prenewparent = local->prenewparent;
+ postnewparent = local->postnewparent;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc){
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ prenewparent,
+ postnewparent,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (prenewparent)
+ GF_FREE(prenewparent);
+ if (postnewparent)
+ GF_FREE(postnewparent);
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t rename_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ rename_unwind(frame);
+ return 0;
+}
+
+static int32_t rename_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ local->buf = *buf;
+ local->prebuf = *preoldparent;
+ local->postbuf = *postoldparent;
+ if (prenewparent) {
+ local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
+ gf_crypt_mt_iatt);
+ if (!local->prenewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->prenewparent = *prenewparent;
+ }
+ if (postnewparent) {
+ local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
+ gf_crypt_mt_iatt);
+ if (!local->postnewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->postnewparent = *postnewparent;
+ }
+ STACK_WIND(frame,
+ rename_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ rename_unwind(frame);
+ return 0;
+}
+
+void rename_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ rename_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+static int32_t __do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_wind_handler_t wind_fn;
+ linkop_unwind_handler_t unwind_fn;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret >= 0)
+ wind_fn(frame, this);
+ else {
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ unwind_fn(frame);
+ }
+ return 0;
+}
+
+static int32_t do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if(op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * Update the metadata string (against the new pathname);
+ * submit the result
+ */
+static int32_t linkop_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ gf_boolean_t upload_info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+ struct crypt_inode_info *info;
+ data_t *old_mtd;
+ uint32_t new_mtd_size;
+ uint64_t value = 0;
+ void (*unwind_fn)(call_frame_t *frame);
+ void (*wind_fn)(call_frame_t *frame, xlator_t *this);
+ mtd_op_t mop;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ mop = linkop_mtdop_dispatch(local->fop);
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto wind;
+ if (op_ret < 0)
+ /*
+ * verification failed
+ */
+ goto error;
+
+ old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
+ if (!old_mtd) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Metadata string wasn't found");
+ goto error;
+ }
+ new_mtd_size = format_size(mop, old_mtd->len);
+ op_errno = alloc_format(local, new_mtd_size);
+ if (op_errno)
+ goto error;
+ /*
+ * check for cached info
+ */
+ op_ret = inode_ctx_get(fd->inode, this, &value);
+ if (op_ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode info was not found");
+ op_errno = EINVAL;
+ goto error;
+ }
+ /*
+ * info was found in the cache
+ */
+ local->info = info;
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info wasn't found in the cache;
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info)
+ goto error;
+ init_inode_info_head(info, fd);
+ local->info = info;
+ upload_info = _gf_true;
+ }
+ op_errno = open_format((unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (op_errno)
+ goto error;
+ if (upload_info == _gf_true) {
+ op_errno = init_inode_info_tail(info,
+ get_master_cinfo(priv));
+ if (op_errno)
+ goto error;
+ op_errno = inode_ctx_put(fd->inode, this,
+ (uint64_t)(long)(info));
+ if (op_errno == -1) {
+ op_errno = EIO;
+ goto error;
+ }
+ }
+ /*
+ * update the format string (append/update/cup a MAC)
+ */
+ op_errno = update_format(local->format,
+ (unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->mac_idx,
+ mop,
+ local->newloc,
+ info,
+ get_master_cinfo(priv),
+ local);
+ if (op_errno)
+ goto error;
+ /*
+ * store the new format string on the server
+ */
+ if (new_mtd_size) {
+ op_errno = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_mtd_size);
+ if (op_errno)
+ goto error;
+ }
+ STACK_WIND(frame,
+ do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->loc,
+ local->xattr,
+ 0,
+ NULL);
+ return 0;
+ wind:
+ wind_fn(frame, this);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t linkop_grab_local(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags, dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret = ENOMEM;
+ fd_t *fd;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, op);
+ if (!local)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ fd = fd_create(oldloc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->flags = flags;
+ local->loc = GF_CALLOC(1, sizeof(*oldloc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, oldloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ local->loc = NULL;
+ goto error;
+ }
+ if (newloc) {
+ local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc);
+ if (!local->newloc) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ goto error;
+ }
+ memset(local->newloc, 0, sizeof(*local->newloc));
+ ret = loc_copy(local->newloc, newloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ GF_FREE(local->newloc);
+ goto error;
+ }
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ return 0;
+ error:
+ if (local->xdata)
+ dict_unref(local->xdata);
+ if (local->fd)
+ fd_unref(local->fd);
+ local->fd = 0;
+ local->loc = NULL;
+ local->newloc = NULL;
+
+ local->op_ret = -1;
+ local->op_errno = ret;
+
+ return ret;
+}
+
+/*
+ * read and verify locked metadata against the old pathname (via open);
+ * update the metadata string in accordance with the new pathname;
+ * submit modified metadata;
+ * wind;
+ */
+static int32_t linkop(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags,
+ dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret;
+ dict_t *dict;
+ crypt_local_t *local;
+ void (*unwind_fn)(call_frame_t *frame);
+
+ unwind_fn = linkop_unwind_dispatch(op);
+
+ ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
+ local = frame->local;
+ if (ret)
+ goto error;
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * Set a message to crypt_open() that we need
+ * locked metadata string.
+ * All link operations (link, unlink, rename)
+ * need write lock
+ */
+ msgflags_set_mtd_wlock(&local->msgflags);
+ ret = dict_set_static_bin(dict,
+ MSGFLAGS_PREFIX,
+ &local->msgflags,
+ sizeof(local->msgflags));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
+ dict_unref(dict);
+ goto error;
+ }
+ /*
+ * verify metadata against the old pathname
+ * and retrieve locked metadata string
+ */
+ STACK_WIND(frame,
+ linkop_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ oldloc,
+ O_RDWR,
+ local->fd,
+ dict);
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = ret;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t crypt_link(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
+}
+
+static int32_t crypt_unlink(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
+}
+
+static int32_t crypt_rename(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
+}
+
+static void put_one_call_open(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ fd_t *fd = local->fd;
+ loc_t *loc = local->loc;
+ dict_t *xdata = local->xdata;
+
+ STACK_UNWIND_STRICT(open,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ fd,
+ xdata);
+ fd_unref(fd);
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(loc);
+ GF_FREE(loc);
+ }
+}
+
+static int32_t __crypt_readv_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ /* read deals with data configs only */
+ struct iovec *avec = local->data_conf.avec;
+ char **pool = local->data_conf.pool;
+ int blocks_in_pool = local->data_conf.blocks_in_pool;
+ struct iobref *iobref = local->iobref;
+ struct iobref *iobref_data = local->iobref_data;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "readv unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ dump_plain_text(local, avec);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
+ (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
+ (int)(local->rw_count > 0 ? iovec_get_size(avec, local->data_conf.acount) : 0),
+ (unsigned long long)local->buf.ia_size);
+
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ local->rw_count > 0 ? local->rw_count : local->op_ret,
+ local->op_errno,
+ avec,
+ avec ? local->data_conf.acount : 0,
+ &local->buf,
+ local->iobref,
+ local_xdata);
+
+ free_avec(avec, pool, blocks_in_pool);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobref_data)
+ iobref_unref(iobref_data);
+ return 0;
+}
+
+static void crypt_readv_done(call_frame_t *frame, xlator_t *this)
+{
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_readv_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+}
+
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local))
+ crypt_readv_done(frame, this);
+}
+
+static int32_t __crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ int32_t ret_to_user;
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+ /*
+ * Calculate amout of butes to be returned
+ * to user. We need to subtract paddings that
+ * have been written as a part of atom.
+ */
+ /*
+ * subtract head padding
+ */
+ if (local->rw_count == 0)
+ /*
+ * Nothing has been written, it must be an error
+ */
+ ret_to_user = local->op_ret;
+ else if (local->rw_count <= local->data_conf.off_in_head) {
+ gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
+ ret_to_user = 0;
+ }
+ else
+ ret_to_user = local->rw_count -
+ local->data_conf.off_in_head;
+ /*
+ * subtract tail padding
+ */
+ if (ret_to_user > local->data_conf.orig_size)
+ ret_to_user = local->data_conf.orig_size;
+
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "writev: ret_to_user: %d", ret_to_user);
+
+ STACK_UNWIND_STRICT(writev,
+ frame,
+ ret_to_user,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+ return 0;
+}
+
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_writev_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+static int32_t __crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ char *iobase = local->vec.iov_base;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "ftruncate unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "ftruncate, return to user: presize=%llu, postsize=%llu",
+ (unsigned long long)local->prebuf.ia_size,
+ (unsigned long long)local->postbuf.ia_size);
+
+ STACK_UNWIND_STRICT(ftruncate,
+ frame,
+ local->op_ret < 0 ? -1 : 0,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobase)
+ GF_FREE(iobase);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ dict_unref(local->xattr);
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+}
+
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_ftruncate_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+/*
+ * load regular file size for some FOPs
+ */
+static int32_t load_file_size(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0)
+ goto unwind;
+ /*
+ * load regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+ local->buf.ia_size = data_to_uint64(data);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "FOP %d: Translate regular file to %llu",
+ local->fop,
+ (unsigned long long)local->buf.ia_size);
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? local->inode : NULL,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata,
+ op_ret >= 0 ? &local->postbuf : NULL);
+ break;
+ case GF_FOP_READ:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ 0,
+ op_ret >= 0 ? &local->buf : NULL,
+ NULL,
+ NULL);
+ break;
+ default:
+ gf_log(this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (local_inode)
+ inode_unref(local_inode);
+ return 0;
+}
+
+static int32_t crypt_stat_common_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->buf = *buf;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+}
+
+static int32_t crypt_fstat(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
+ if (!local)
+ goto error;
+ local->fd = fd_ref(fd);
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_stat(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_STAT);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_lookup_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->postbuf = *postparent;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ uuid_copy(local->loc->gfid, buf->ia_gfid);
+
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ unwind:
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t crypt_lookup(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
+ STACK_WIND(frame,
+ crypt_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * for every regular directory entry find its real file size
+ * and update stat's buf properly
+ */
+static int32_t crypt_readdirp_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ data_t *data;
+
+ if (!IA_ISREG(entry->d_stat.ia_type))
+ continue;
+ data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
+ if (!data){
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size of direntry not found");
+ op_errno = EIO;
+ op_ret = -1;
+ break;
+ }
+ entry->d_stat.ia_size = data_to_uint64(data);
+ }
+ unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+/*
+ * ->readdirp() fills in-core inodes, so we need to set proper
+ * file sizes for all directory entries of the parent @fd.
+ * Actual updates take place in ->crypt_readdirp_cbk()
+ */
+static int32_t crypt_readdirp(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto error;
+ }
+ else
+ dict_ref(xdata);
+ /*
+ * make sure that we'll have real file sizes at ->readdirp_cbk()
+ */
+ ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(xdata);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd,
+ size,
+ offset,
+ xdata);
+ dict_unref(xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ret, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_access(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ gf_log(this->name, GF_LOG_WARNING,
+ "NFS mounts of encrypted volumes are unsupported");
+ STACK_UNWIND_STRICT(access, frame, -1, EPERM, NULL);
+ return 0;
+}
+
+int32_t master_set_block_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ uint64_t block_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("block-size", block_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("block-size", block_size, size, error);
+
+ switch (block_size) {
+ case 512:
+ master->m_block_bits = 9;
+ break;
+ case 1024:
+ master->m_block_bits = 10;
+ break;
+ case 2048:
+ master->m_block_bits = 11;
+ break;
+ case 4096:
+ master->m_block_bits = 12;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: unsupported block size %llu",
+ (unsigned long long)block_size);
+ goto error;
+ }
+ return 0;
+ error:
+ return -1;
+}
+
+int32_t master_set_alg(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_alg = AES_CIPHER_ALG;
+ return 0;
+}
+
+int32_t master_set_mode(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_mode = XTS_CIPHER_MODE;
+ return 0;
+}
+
+/*
+ * set key size in bits to the master info
+ * Pre-conditions: cipher mode in the master info is uptodate.
+ */
+static int master_set_data_key_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ int32_t ret;
+ uint64_t key_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("data-key-size", key_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("data-key-size", key_size, size, error);
+
+ ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: wrong bin key size %llu for alg %d mode %d",
+ (unsigned long long)key_size,
+ (int)master->m_alg,
+ (int)master->m_mode);
+ goto error;
+ }
+ master->m_dkey_size = key_size;
+ return 0;
+ error:
+ return -1;
+}
+
+static int is_hex(char *s) {
+ return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
+}
+
+static int parse_hex_buf(xlator_t *this, char *src, unsigned char *dst,
+ int hex_size)
+{
+ int i;
+ int hex_byte = 0;
+
+ for (i = 0; i < (hex_size / 2); i++) {
+ if (!is_hex(src + i*2) || !is_hex(src + i*2 + 1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: not hex symbol in key");
+ return -1;
+ }
+ if (sscanf(src + i*2, "%2x", &hex_byte) != 1) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: can not parse hex key");
+ return -1;
+ }
+ dst[i] = hex_byte & 0xff;
+ }
+ return 0;
+}
+
+/*
+ * Parse options;
+ * install master volume key
+ */
+int32_t master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ int32_t ret;
+ FILE *file = NULL;
+
+ int32_t key_size;
+ char *opt_key_file_pathname = NULL;
+
+ unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
+ char hex_buf[2 * MASTER_VOL_KEY_SIZE];
+
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ /*
+ * extract master key passed via option
+ */
+ GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
+
+ if (!opt_key_file_pathname) {
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
+ return -1;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
+ opt_key_file_pathname);
+
+ file = fopen(opt_key_file_pathname, "r");
+ if (file == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: can not open file with master key");
+ return -1;
+ }
+ /*
+ * extract hex key
+ */
+ key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
+ if (key_size < sizeof(hex_buf)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: master key is too short");
+ goto bad_key;
+ }
+ ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
+ if (ret)
+ goto bad_key;
+ memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
+ memset(hex_buf, 0, sizeof(hex_buf));
+ fclose(file);
+
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return 0;
+ bad_key:
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
+ if (file)
+ fclose(file);
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return -1;
+}
+
+/*
+ * Derive volume key for object-id authentication
+ */
+int32_t master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ return get_nmtd_vol_key(get_master_cinfo(priv));
+}
+
+int32_t crypt_init_xlator(xlator_t *this)
+{
+ int32_t ret;
+ crypt_private_t *priv = this->private;
+
+ ret = master_set_alg(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_mode(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_block_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_data_key_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_master_vol_key(this, priv);
+ if (ret)
+ return ret;
+ return master_set_nmtd_vol_key(this, priv);
+}
+
+static int32_t crypt_alloc_private(xlator_t *this)
+{
+ this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
+ if (!this->private) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Can not allocate memory for private data");
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static void crypt_free_private(xlator_t *this)
+{
+ crypt_private_t *priv = this->private;
+ if (priv) {
+ memset(priv, 0, sizeof(*priv));
+ GF_FREE(priv);
+ }
+}
+
+int32_t reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ crypt_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("crypt", this, error);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, error);
+ GF_VALIDATE_OR_GOTO (this->name, options, error);
+
+ priv = this->private;
+
+ ret = master_set_block_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure block size");
+ goto error;
+ }
+ ret = master_set_data_key_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure data key size");
+ goto error;
+ }
+ return 0;
+ error:
+ return ret;
+}
+
+int32_t init(xlator_t *this)
+{
+ int32_t ret;
+
+ if (!this->children || this->children->next) {
+ gf_log ("crypt", GF_LOG_ERROR,
+ "FATAL: crypt should have exactly one child");
+ return EINVAL;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+ ret = crypt_alloc_private(this);
+ if (ret)
+ return ret;
+ ret = crypt_init_xlator(this);
+ if (ret)
+ goto error;
+ this->local_pool = mem_pool_new(crypt_local_t, 64);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto error;
+ }
+ gf_log ("crypt", GF_LOG_INFO, "crypt xlator loaded");
+ return 0;
+ error:
+ crypt_free_private(this);
+ return ret;
+}
+
+void fini (xlator_t *this)
+{
+ crypt_free_private(this);
+}
+
+struct xlator_fops fops = {
+ .readv = crypt_readv,
+ .writev = crypt_writev,
+ .truncate = crypt_truncate,
+ .ftruncate = crypt_ftruncate,
+ .setxattr = crypt_setxattr,
+ .fsetxattr = crypt_fsetxattr,
+ .link = crypt_link,
+ .unlink = crypt_unlink,
+ .rename = crypt_rename,
+ .open = crypt_open,
+ .create = crypt_create,
+ .stat = crypt_stat,
+ .fstat = crypt_fstat,
+ .lookup = crypt_lookup,
+ .readdirp = crypt_readdirp,
+ .access = crypt_access
+};
+
+struct xlator_cbks cbks = {
+ .forget = crypt_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"master-key"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "Pathname of regular file which contains master volume key"
+ },
+ { .key = {"data-key-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Data key size (bits)",
+ .min = 256,
+ .max = 512,
+ .default_value = "256",
+ },
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Atom size (bits)",
+ .min = 512,
+ .max = 4096,
+ .default_value = "4096"
+ },
+ { .key = {NULL} },
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
new file mode 100644
index 000000000..01a8542ab
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.h
@@ -0,0 +1,899 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_H__
+#define __CRYPT_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <openssl/hmac.h>
+#include <openssl/cmac.h>
+#include <openssl/modes.h>
+#include "crypt-mem-types.h"
+
+#define CRYPT_XLATOR_ID (0)
+
+#define MAX_IOVEC_BITS (3)
+#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
+#define KEY_FACTOR_BITS (6)
+
+#define DEBUG_CRYPT (0)
+#define TRIVIAL_TFM (0)
+
+#define CRYPT_MIN_BLOCK_BITS (9)
+#define CRYPT_MAX_BLOCK_BITS (12)
+
+#define MASTER_VOL_KEY_SIZE (32)
+#define NMTD_VOL_KEY_SIZE (16)
+
+struct crypt_key {
+ uint32_t len;
+ const char *label;
+};
+
+/*
+ * Add new key types to the end of this
+ * enumeration but before LAST_KEY_TYPE
+ */
+typedef enum {
+ MASTER_VOL_KEY,
+ NMTD_VOL_KEY,
+ NMTD_LINK_KEY,
+ EMTD_FILE_KEY,
+ DATA_FILE_KEY_256,
+ DATA_FILE_KEY_512,
+ LAST_KEY_TYPE
+}crypt_key_type;
+
+struct kderive_context {
+ const unsigned char *pkey;/* parent key */
+ uint32_t pkey_len; /* parent key size, bits */
+ uint32_t ckey_len; /* child key size, bits */
+ unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
+ uint32_t fid_len; /* fid len, bytes */
+ unsigned char *out; /* contains child keying material */
+ uint32_t out_len; /* out len, bytes */
+};
+
+typedef enum {
+ DATA_ATOM,
+ HOLE_ATOM,
+ LAST_DATA_TYPE
+}atom_data_type;
+
+typedef enum {
+ HEAD_ATOM,
+ TAIL_ATOM,
+ FULL_ATOM,
+ LAST_LOCALITY_TYPE
+}atom_locality_type;
+
+typedef enum {
+ MTD_CREATE,
+ MTD_APPEND,
+ MTD_OVERWRITE,
+ MTD_CUT,
+ MTD_LAST_OP
+} mtd_op_t;
+
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+};
+
+struct object_cipher_info {
+ cipher_alg_t o_alg;
+ cipher_mode_t o_mode;
+ uint32_t o_block_bits;
+ uint32_t o_dkey_size; /* raw data key size in bits */
+ union {
+ struct {
+ unsigned char ivec[16];
+ AES_KEY dkey[2];
+ AES_KEY tkey; /* key used for tweaking */
+ XTS128_CONTEXT xts;
+ } aes_xts;
+ } u;
+};
+
+struct master_cipher_info {
+ /*
+ * attributes inherited by newly created regular files
+ */
+ cipher_alg_t m_alg;
+ cipher_mode_t m_mode;
+ uint32_t m_block_bits;
+ uint32_t m_dkey_size; /* raw key size in bits */
+ /*
+ * master key
+ */
+ unsigned char m_key[MASTER_VOL_KEY_SIZE];
+ /*
+ * volume key for oid authentication
+ */
+ unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
+};
+
+/*
+* This info is not changed during file's life
+ */
+struct crypt_inode_info {
+#if DEBUG_CRYPT
+ loc_t *loc; /* pathname that the file has been
+ opened, or created with */
+#endif
+ uint16_t nr_minor;
+ uuid_t oid;
+ struct object_cipher_info cinfo;
+};
+
+/*
+ * this should locate in secure memory
+ */
+typedef struct {
+ struct master_cipher_info master;
+} crypt_private_t;
+
+static inline struct master_cipher_info *get_master_cinfo(crypt_private_t *priv)
+{
+ return &priv->master;
+}
+
+static inline struct object_cipher_info *get_object_cinfo(struct crypt_inode_info
+ *info)
+{
+ return &info->cinfo;
+}
+
+/*
+ * this describes layouts and properties
+ * of atoms in an aligned vector
+ */
+struct avec_config {
+ uint32_t atom_size;
+ atom_data_type type;
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head;
+ uint32_t off_in_tail;
+ uint32_t gap_in_tail;
+ uint32_t nr_full_blocks;
+
+ struct iovec *avec; /* aligned vector */
+ uint32_t acount; /* number of avec components. The same
+ * as number of occupied logical blocks */
+ char **pool;
+ uint32_t blocks_in_pool;
+ uint32_t cursor; /* makes sense only for ordered writes,
+ * so there is no races on this counter.
+ *
+ * Cursor is per-config object, we don't
+ * reset cursor for atoms of different
+ * localities (head, tail, full)
+ */
+};
+
+
+typedef struct {
+ glusterfs_fop_t fop; /* code of FOP this local info built for */
+ fd_t *fd;
+ inode_t *inode;
+ loc_t *loc;
+ int32_t mac_idx;
+ loc_t *newloc;
+ int32_t flags;
+ int32_t wbflags;
+ struct crypt_inode_info *info;
+ struct iobref *iobref;
+ struct iobref *iobref_data;
+ off_t offset;
+
+ uint64_t old_file_size; /* per FOP, retrieved under lock held */
+ uint64_t cur_file_size; /* per iteration, before issuing IOs */
+ uint64_t new_file_size; /* per iteration, after issuing IOs */
+
+ uint64_t io_offset; /* offset of IOs issued per iteration */
+ uint64_t io_offset_nopad; /* offset of user's data in the atom */
+ uint32_t io_size; /* size of IOs issued per iteration */
+ uint32_t io_size_nopad; /* size of user's data in the IOs */
+ uint32_t eof_padding_size; /* size od EOF padding in the IOs */
+
+ gf_lock_t call_lock; /* protect nr_calls from many cbks */
+ int32_t nr_calls;
+
+ atom_data_type active_setup; /* which setup (hole or date)
+ is currently active */
+ /* data setup */
+ struct avec_config data_conf;
+
+ /* hole setup */
+ int hole_conv_in_proggress;
+ gf_lock_t hole_lock; /* protect hole config from many cbks */
+ int hole_handled;
+ struct avec_config hole_conf;
+ struct iatt buf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t rw_count; /* total read or written */
+ gf_lock_t rw_count_lock; /* protect the counter above */
+ unsigned char *format; /* for create, update format string */
+ uint32_t format_size;
+ uint32_t msgflags; /* messages for crypt_open() */
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iovec vec; /* contains last file's atom for
+ read-prune-write sequence */
+ gf_boolean_t custom_mtd;
+ /*
+ * the next 3 fields are used by readdir and friends
+ */
+ gf_dirent_t *de; /* directory entry */
+ char *de_path; /* pathname of directory entry */
+ uint32_t de_prefix_len; /* lenght of the parent's pathname */
+ gf_dirent_t *entries;
+
+ uint32_t update_disk_file_size:1;
+} crypt_local_t;
+
+/* This represents a (read)modify-write atom */
+struct rmw_atom {
+ atom_locality_type locality;
+ /*
+ * read-modify-write sequence of the atom
+ */
+ int32_t (*rmw)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata);
+ /*
+ * offset of the logical block in a file
+ */
+ loff_t (*offset_at)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * IO offset in an atom
+ */
+ uint32_t (*offset_in)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * number of bytes of plain text of this atom that user
+ * wants to read/write.
+ * It can be smaller than atom_size in the case of head
+ * or tail atoms.
+ */
+ uint32_t (*io_size_nopad)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * which iovec represents the atom
+ */
+ struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
+ /*
+ * how many bytes of partial block should be uptodated by
+ * reading from disk.
+ * This is used to perform a read component of RMW (read-modify-write).
+ */
+ uint32_t (*count_to_uptodate)(call_frame_t *frame, struct object_cipher_info *object);
+ struct avec_config *(*get_config)(call_frame_t *frame);
+};
+
+struct data_cipher_alg {
+ gf_boolean_t atomic; /* true means that algorithm requires
+ to pad data before cipher transform */
+ gf_boolean_t should_pad; /* true means that algorithm requires
+ to pad the end of file with extra-data */
+ uint32_t blkbits; /* blksize = 1 << blkbits */
+ /*
+ * any preliminary sanity checks goes here
+ */
+ int32_t (*init)(void);
+ /*
+ * set alg-mode specific inode info
+ */
+ int32_t (*set_private)(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * check alg-mode specific data key
+ */
+ int32_t (*check_key)(uint32_t key_size);
+ void (*set_iv)(off_t offset, struct object_cipher_info *object);
+ int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
+ size_t length, off_t offset, const int enc,
+ struct object_cipher_info *object);
+};
+
+/*
+ * version-dependent metadata loader
+ */
+struct crypt_mtd_loader {
+ /*
+ * return core format size
+ */
+ size_t (*format_size)(mtd_op_t op, size_t old_size);
+ /*
+ * pack version-specific metadata of an object
+ * at ->create()
+ */
+ int32_t (*create_format)(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * extract version-specific metadata of an object
+ * at ->open() time
+ */
+ int32_t (*open_format)(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info);
+ int32_t (*update_format)(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+};
+
+typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata);
+typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
+typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
+
+
+/* Declarations */
+
+/* keys.c */
+extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
+int32_t get_nmtd_vol_key(struct master_cipher_info *master);
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key);
+/* data.c */
+extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE];
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf);
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count);
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop);
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype);
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t setup_gap_in_tail);
+
+/* metadata.c */
+extern struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER];
+
+int32_t alloc_format(crypt_local_t *local, size_t size);
+int32_t alloc_format_create(crypt_local_t *local);
+void free_format(crypt_local_t *local);
+size_t format_size(mtd_op_t op, size_t old_size);
+size_t new_format_size(void);
+int32_t open_format(unsigned char *str, int32_t len, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master, crypt_local_t *local,
+ gf_boolean_t load_info);
+int32_t update_format(unsigned char *new, unsigned char *old,
+ size_t old_len, int32_t mac_idx, mtd_op_t op, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+
+/* atom.c */
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality);
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype);
+void submit_full(call_frame_t *frame, xlator_t *this);
+
+/* crypt.c */
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop);
+static size_t iovec_get_size(struct iovec *vec, uint32_t count);
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom, off_t io_offset,
+ uint32_t io_size);
+void link_wind(call_frame_t *frame, xlator_t *this);
+void unlink_wind(call_frame_t *frame, xlator_t *this);
+void link_unwind(call_frame_t *frame);
+void unlink_unwind(call_frame_t *frame);
+void rename_wind(call_frame_t *frame, xlator_t *this);
+void rename_unwind(call_frame_t *frame);
+
+/* Inline functions */
+
+static inline size_t iovec_get_size(struct iovec *vec, uint32_t count)
+{
+ int i;
+ size_t size = 0;
+ for (i = 0; i < count; i++)
+ size += vec[i].iov_len;
+ return size;
+}
+
+static inline int32_t crypt_xlator_id(void)
+{
+ return CRYPT_XLATOR_ID;
+}
+
+static inline mtd_loader_id current_mtd_loader(void)
+{
+ return MTD_LOADER_V1;
+}
+
+static inline uint32_t master_key_size (void)
+{
+ return crypt_keys[MASTER_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t nmtd_vol_key_size (void)
+{
+ return crypt_keys[NMTD_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t alg_mode_blkbits(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].blkbits;
+}
+
+static inline uint32_t alg_mode_blksize(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return 1 << alg_mode_blkbits(alg, mode);
+}
+
+static inline gf_boolean_t alg_mode_atomic(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].atomic;
+}
+
+static inline gf_boolean_t alg_mode_should_pad(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].should_pad;
+}
+
+static inline uint32_t master_alg_blksize(struct master_cipher_info *mr)
+{
+ return alg_mode_blksize(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t master_alg_blkbits(struct master_cipher_info *mr)
+{
+ return alg_mode_blkbits(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_atomic(struct master_cipher_info *mr)
+{
+ return alg_mode_atomic(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_should_pad(struct master_cipher_info *mr)
+{
+ return alg_mode_should_pad(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t object_alg_blksize(struct object_cipher_info *ob)
+{
+ return alg_mode_blksize(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t object_alg_blkbits(struct object_cipher_info *ob)
+{
+ return alg_mode_blkbits(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_atomic(struct object_cipher_info *ob)
+{
+ return alg_mode_atomic(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_should_pad(struct object_cipher_info *ob)
+{
+ return alg_mode_should_pad(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t aes_raw_key_size(struct master_cipher_info *master)
+{
+ return master->m_dkey_size >> 3;
+}
+
+static inline struct avec_config *get_hole_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->hole_conf);
+}
+
+static inline struct avec_config *get_data_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->data_conf);
+}
+
+static inline int32_t get_atom_bits (struct object_cipher_info *object)
+{
+ return object->o_block_bits;
+}
+
+static inline int32_t get_atom_size (struct object_cipher_info *object)
+{
+ return 1 << get_atom_bits(object);
+}
+
+static inline int32_t has_head_block(struct avec_config *conf)
+{
+ return conf->off_in_head ||
+ (conf->acount == 1 && conf->off_in_tail);
+}
+
+static inline int32_t has_tail_block(struct avec_config *conf)
+{
+ return conf->off_in_tail && conf->acount > 1;
+}
+
+static inline int32_t has_full_blocks(struct avec_config *conf)
+{
+ return conf->nr_full_blocks;
+}
+
+static inline int32_t should_submit_head_block(struct avec_config *conf)
+{
+ return has_head_block(conf) && (conf->cursor == 0);
+}
+
+static inline int32_t should_submit_tail_block(struct avec_config *conf)
+{
+ return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
+}
+
+static inline int32_t should_submit_full_block(struct avec_config *conf)
+{
+ uint32_t start = has_head_block(conf) ? 1 : 0;
+
+ return has_full_blocks(conf) &&
+ conf->cursor >= start &&
+ conf->cursor < start + conf->nr_full_blocks;
+}
+
+#if DEBUG_CRYPT
+static inline void crypt_check_input_len(size_t len,
+ struct object_cipher_info *object)
+{
+ if (object_alg_should_pad(object) && (len & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
+}
+
+static inline void check_head_block(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
+}
+
+static inline void check_tail_block(struct avec_config *conf)
+{
+ if (!has_tail_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
+}
+
+static inline void check_full_block(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
+}
+
+static inline void check_cursor_head(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of head atom method");
+ else if (conf->cursor != 0)
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
+ conf->cursor);
+}
+
+static inline void check_cursor_full(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of full atom method");
+ if (has_head_block(conf) && (conf->cursor == 0))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor is not at full atom");
+}
+
+/*
+ * FIXME: use avec->iov_len to check setup
+ */
+static inline int data_local_invariant(crypt_local_t *local)
+{
+ return 0;
+}
+
+#else
+#define crypt_check_input_len(len, object) noop
+#define check_head_block(conf) noop
+#define check_tail_block(conf) noop
+#define check_full_block(conf) noop
+#define check_cursor_head(conf) noop
+#define check_cursor_full(conf) noop
+
+#endif /* DEBUG_CRYPT */
+
+static inline struct avec_config *conf_by_type(call_frame_t *frame,
+ atom_data_type dtype)
+{
+ struct avec_config *conf = NULL;
+
+ switch (dtype) {
+ case HOLE_ATOM:
+ conf = get_hole_conf(frame);
+ break;
+ case DATA_ATOM:
+ conf = get_data_conf(frame);
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
+ }
+ return conf;
+}
+
+static inline uint32_t nr_calls_head(struct avec_config *conf)
+{
+ return has_head_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_tail(struct avec_config *conf)
+{
+ return has_tail_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_full(struct avec_config *conf)
+{
+ switch(conf->type) {
+ case HOLE_ATOM:
+ return has_full_blocks(conf);
+ case DATA_ATOM:
+ return has_full_blocks(conf) ?
+ logical_blocks_occupied(0,
+ conf->nr_full_blocks,
+ MAX_IOVEC_BITS) : 0;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
+ return 0;
+ }
+}
+
+static inline uint32_t nr_calls(struct avec_config *conf)
+{
+ return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
+}
+
+static inline uint32_t nr_calls_data(call_frame_t *frame)
+{
+ return nr_calls(get_data_conf(frame));
+}
+
+static inline uint32_t nr_calls_hole(call_frame_t *frame)
+{
+ return nr_calls(get_hole_conf(frame));
+}
+
+static inline void get_one_call_nolock(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ ++local->nr_calls;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
+}
+
+static inline void get_one_call(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_one_call_nolock(frame);
+ UNLOCK(&local->call_lock);
+}
+
+static inline void get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ local->nr_calls += nr;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
+}
+
+static inline void get_nr_calls(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_nr_calls_nolock(frame, nr);
+ UNLOCK(&local->call_lock);
+}
+
+static inline int put_one_call(crypt_local_t *local)
+{
+ uint32_t last = 0;
+
+ LOCK(&local->call_lock);
+ if (--local->nr_calls == 0)
+ last = 1;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
+
+ UNLOCK(&local->call_lock);
+ return last;
+}
+
+static inline int is_appended_write(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->orig_offset + conf->orig_size > local->old_file_size;
+}
+
+static inline int is_ordered_mode(call_frame_t *frame)
+{
+#if 0
+ crypt_local_t *local = frame->local;
+ return local->fop == GF_FOP_FTRUNCATE ||
+ (local->fop == GF_FOP_WRITE && is_appended_write(frame));
+#endif
+ return 1;
+}
+
+static inline int32_t hole_conv_completed(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+ return conf->cursor == conf->acount;
+}
+
+static inline int32_t data_write_in_progress(crypt_local_t *local)
+{
+ return local->active_setup == DATA_ATOM;
+}
+
+static inline int32_t parent_is_crypt_xlator(call_frame_t *frame,
+ xlator_t *this)
+{
+ return frame->parent->this == this;
+}
+
+static inline linkop_wind_handler_t linkop_wind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_wind;
+ case GF_FOP_UNLINK:
+ return unlink_wind;
+ case GF_FOP_RENAME:
+ return rename_wind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline linkop_unwind_handler_t linkop_unwind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_unwind;
+ case GF_FOP_UNLINK:
+ return unlink_unwind;
+ case GF_FOP_RENAME:
+ return rename_unwind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline mtd_op_t linkop_mtdop_dispatch(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_LINK:
+ return MTD_APPEND;
+ case GF_FOP_UNLINK:
+ return MTD_CUT;
+ case GF_FOP_RENAME:
+ return MTD_OVERWRITE;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
+ return MTD_LAST_OP;
+ }
+}
+
+#endif /* __CRYPT_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
new file mode 100644
index 000000000..762fa554a
--- /dev/null
+++ b/xlators/encryption/crypt/src/data.c
@@ -0,0 +1,769 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
+{
+ unsigned char *ivec;
+
+ ivec = object->u.aes_xts.ivec;
+
+ /* convert the tweak into a little-endian byte
+ * array (IEEE P1619/D16, May 2007, section 5.1)
+ */
+
+ *((uint64_t *)ivec) = htole64(offset);
+
+ /* ivec is padded with zeroes */
+}
+
+static int32_t aes_set_keys_common(unsigned char *raw_key, uint32_t key_size,
+ AES_KEY *keys)
+{
+ int32_t ret;
+
+ ret = AES_set_encrypt_key(raw_key,
+ key_size,
+ &keys[AES_ENCRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
+ return ret;
+ }
+ ret = AES_set_decrypt_key(raw_key,
+ key_size,
+ &keys[AES_DECRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * set private cipher info for xts mode
+ */
+static int32_t set_private_aes_xts(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int ret;
+ struct object_cipher_info *object = get_object_cinfo(info);
+ unsigned char *data_key;
+ uint32_t subkey_size;
+
+ /* init tweak value */
+ memset(object->u.aes_xts.ivec, 0, 16);
+
+ data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
+ if (!data_key)
+ return ENOMEM;
+
+ /*
+ * retrieve data keying meterial
+ */
+ ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * parse compound xts key
+ */
+ subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
+ /*
+ * install key for data encryption
+ */
+ ret = aes_set_keys_common(data_key,
+ subkey_size << 3, object->u.aes_xts.dkey);
+ if (ret) {
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * set up key used to encrypt tweaks
+ */
+ ret = AES_set_encrypt_key(data_key + subkey_size,
+ object->o_dkey_size / 2,
+ &object->u.aes_xts.tkey);
+ if (ret < 0)
+ gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
+
+ GF_FREE(data_key);
+ return ret;
+}
+
+static int32_t aes_xts_init(void)
+{
+ cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
+ return 0;
+}
+
+static int32_t check_key_aes_xts(uint32_t keysize)
+{
+ switch(keysize) {
+ case 256:
+ case 512:
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int32_t encrypt_aes_xts(const unsigned char *from,
+ unsigned char *to, size_t length,
+ off_t offset, const int enc,
+ struct object_cipher_info *object)
+{
+ XTS128_CONTEXT ctx;
+ if (enc) {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
+ ctx.block1 = (block128_f)AES_encrypt;
+ }
+ else {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
+ ctx.block1 = (block128_f)AES_decrypt;
+ }
+ ctx.key2 = &object->u.aes_xts.tkey;
+ ctx.block2 = (block128_f)AES_encrypt;
+
+ return CRYPTO_xts128_encrypt(&ctx,
+ object->u.aes_xts.ivec,
+ from,
+ to,
+ length, enc);
+}
+
+/*
+ * Cipher input chunk @from of length @len;
+ * @to: result of cipher transform;
+ * @off: offset in a file (must be cblock-aligned);
+ */
+static void cipher_data(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ const int enc)
+{
+ crypt_check_input_len(len, object);
+
+#if TRIVIAL_TFM && DEBUG_CRYPT
+ return;
+#endif
+ data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
+ data_cipher_algs[object->o_alg][object->o_mode].encrypt
+ ((const unsigned char *)from,
+ (unsigned char *)to,
+ len,
+ off,
+ enc,
+ object);
+}
+
+#define MAX_CIPHER_CHUNK (1 << 30)
+
+/*
+ * Do cipher (encryption/decryption) transform of a
+ * continuous region of memory.
+ *
+ * @len: a number of bytes to transform;
+ * @buf: data to transform;
+ * @off: offset in a file, should be block-aligned
+ * for atomic cipher modes and ksize-aligned
+ * for other modes).
+ * @dir: direction of transform (encrypt/decrypt).
+ */
+static void cipher_region(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ int dir)
+{
+ while (len > 0) {
+ size_t to_cipher;
+
+ to_cipher = len;
+ if (to_cipher > MAX_CIPHER_CHUNK)
+ to_cipher = MAX_CIPHER_CHUNK;
+
+ /* this will reset IV */
+ cipher_data(object,
+ from,
+ to,
+ off,
+ to_cipher,
+ dir);
+ from += to_cipher;
+ to += to_cipher;
+ off += to_cipher;
+ len -= to_cipher;
+ }
+}
+
+/*
+ * Do cipher transform (encryption/decryption) of
+ * plaintext/ciphertext represented by @vec.
+ *
+ * Pre-conditions: @vec represents a continuous piece
+ * of data in a file at offset @off to be ciphered
+ * (encrypted/decrypted).
+ * @count is the number of vec's components. All the
+ * components must be block-aligned, the caller is
+ * responsible for this. @dir is "direction" of
+ * transform (encrypt/decrypt).
+ */
+static void cipher_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off,
+ int32_t dir)
+{
+ int i;
+ int len = 0;
+
+ for (i = 0; i < count; i++) {
+ cipher_region(object,
+ vec[i].iov_base,
+ vec[i].iov_base,
+ off + len,
+ vec[i].iov_len,
+ dir);
+ len += vec[i].iov_len;
+ }
+}
+
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 1);
+}
+
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 0);
+}
+
+#if DEBUG_CRYPT
+static void compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
+{
+ int i;
+ int off = 0;
+ for (i = 0; i < count; i++) {
+ memcpy(buf + off,
+ vec[i].iov_base + skip,
+ vec[i].iov_len - skip);
+
+ off += (vec[i].iov_len - skip);
+ skip = 0;
+ }
+}
+
+static void check_iovecs(struct iovec *vec, int cnt,
+ struct iovec *avec, int acnt, uint32_t off_in_head)
+{
+ char *s1, *s2;
+ uint32_t size, asize;
+
+ size = iovec_get_size(vec, cnt);
+ asize = iovec_get_size(avec, acnt) - off_in_head;
+ if (size != asize) {
+ gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d",
+ size, asize);
+ return;
+ }
+ s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
+ if (!s1) {
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
+ if (!s2) {
+ GF_FREE(s1);
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ compound_stream(vec, cnt, s1, 0);
+ compound_stream(avec, acnt, s2, off_in_head);
+ if (memcmp(s1, s2, size))
+ gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
+ GF_FREE(s1);
+ GF_FREE(s2);
+}
+
+#else
+#define check_iovecs(vec, count, avec, avecn, off) noop
+#endif /* DEBUG_CRYPT */
+
+static char *data_alloc_block(xlator_t *this, crypt_local_t *local,
+ int32_t block_size)
+{
+ struct iobuf *iobuf = NULL;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
+ if (!iobuf) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobuf");
+ return NULL;
+ }
+ if (!local->iobref_data) {
+ local->iobref_data = iobref_new();
+ if (!local->iobref_data) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobref");
+ iobuf_unref(iobuf);
+ return NULL;
+ }
+ }
+ iobref_add(local->iobref_data, iobuf);
+ return iobuf->ptr;
+}
+
+/*
+ * Compound @avec, which represent the same data
+ * chunk as @vec, but has aligned components of
+ * specified block size. Alloc blocks, if needed.
+ * In particular, incomplete head and tail blocks
+ * must be allocated.
+ * Put number of allocated blocks to @num_blocks.
+ *
+ * Example:
+ *
+ * input: data chunk represented by 4 components
+ * [AB],[BC],[CD],[DE];
+ * output: 5 logical blocks (0, 1, 2, 3, 4).
+ *
+ * A B C D E
+ * *-----*+------*-+---*----+--------+-*
+ * | || | | | | | |
+ * *-+-----+*------+-*---+----*--------*-+------*
+ * 0 1 2 3 4
+ *
+ * 0 - incomplete compound (head);
+ * 1, 2 - full compound;
+ * 3 - full non-compound (the case of reuse);
+ * 4 - incomplete non-compound (tail).
+ */
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf)
+{
+ int vecn = 0; /* number of the current component in vec */
+ int avecn = 0; /* number of the current component in avec */
+ off_t vec_off = 0; /* offset in the current vec component,
+ * i.e. the number of bytes have already
+ * been copied */
+ int32_t block_size = get_atom_size(object);
+ size_t to_process; /* number of vec's bytes to copy and(or) re-use */
+ int32_t off_in_head = conf->off_in_head;
+
+ to_process = iovec_get_size(vec, count);
+
+ while (to_process > 0) {
+ if (off_in_head ||
+ vec[vecn].iov_len - vec_off < block_size) {
+ /*
+ * less than block_size:
+ * the case of incomplete (head or tail),
+ * or compound block
+ */
+ size_t copied = 0;
+ /*
+ * populate the pool with a new block
+ */
+ blocks[*blocks_allocated] = data_alloc_block(this,
+ local,
+ block_size);
+ if (!blocks[*blocks_allocated])
+ return -ENOMEM;
+ memset(blocks[*blocks_allocated], 0, off_in_head);
+ /*
+ * fill the block with vec components
+ */
+ do {
+ size_t to_copy;
+
+ to_copy = vec[vecn].iov_len - vec_off;
+ if (to_copy > block_size - off_in_head)
+ to_copy = block_size - off_in_head;
+
+ memcpy(blocks[*blocks_allocated] + off_in_head + copied,
+ vec[vecn].iov_base + vec_off,
+ to_copy);
+
+ copied += to_copy;
+ to_process -= to_copy;
+
+ vec_off += to_copy;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ } while (copied < (block_size - off_in_head) && to_process > 0);
+ /*
+ * update avec
+ */
+ avec[avecn].iov_len = off_in_head + copied;
+ avec[avecn].iov_base = blocks[*blocks_allocated];
+
+ (*blocks_allocated)++;
+ off_in_head = 0;
+ } else {
+ /*
+ * the rest of the current vec component
+ * is not less than block_size, so reuse
+ * the memory buffer of the component.
+ */
+ size_t to_reuse;
+ to_reuse = (to_process > block_size ?
+ block_size :
+ to_process);
+ avec[avecn].iov_len = to_reuse;
+ avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
+
+ vec_off += to_reuse;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ to_process -= to_reuse;
+ }
+ avecn++;
+ }
+ check_iovecs(vec, count, avec, avecn, conf->off_in_head);
+ return 0;
+}
+
+/*
+ * allocate and setup aligned vector for data submission
+ * Pre-condition: @conf is set.
+ */
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count)
+{
+ int32_t ret = ENOMEM;
+ struct iovec *avec;
+ char **pool;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = DATA_ATOM;
+
+ avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ret;
+ pool = GF_CALLOC(conf->acount, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ret;
+ }
+ if (!vec) {
+ /*
+ * degenerated case: no data
+ */
+ pool[0] = data_alloc_block(this, local, get_atom_size(object));
+ if (!pool[0])
+ goto free;
+ blocks_in_pool = 1;
+ avec->iov_base = pool[0];
+ avec->iov_len = conf->off_in_tail;
+ }
+ else {
+ ret = align_iov_by_atoms(this, local, object, vec, vec_count,
+ avec, pool, &blocks_in_pool, conf);
+ if (ret)
+ goto free;
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ret;
+}
+
+/*
+ * allocate and setup aligned vector for hole submission
+ */
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop)
+{
+ uint32_t i, idx;
+ struct iovec *avec;
+ char **pool;
+ uint32_t num_blocks;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = HOLE_ATOM;
+
+ num_blocks = conf->acount -
+ (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ /*
+ * hole goes before data
+ */
+ if (num_blocks == 1 && conf->off_in_tail != 0)
+ /*
+ * we won't submit a hole which fits into
+ * a data atom: this part of hole will be
+ * submitted with data write
+ */
+ return 0;
+ break;
+ case GF_FOP_FTRUNCATE:
+ /*
+ * expanding truncate, hole goes after data,
+ * and will be submited in any case.
+ */
+ break;
+ default:
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad file operation %d", fop);
+ return 0;
+ }
+ avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ENOMEM;
+ pool = GF_CALLOC(num_blocks, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ENOMEM;
+ }
+ for (i = 0; i < num_blocks; i++) {
+ pool[i] = data_alloc_block(this, local, get_atom_size(object));
+ if (pool[i] == NULL)
+ goto free;
+ blocks_in_pool++;
+ }
+ if (has_head_block(conf)) {
+ /* set head block */
+ idx = 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base + conf->off_in_head,
+ 0,
+ get_atom_size(object) - conf->off_in_head);
+ }
+ if (has_tail_block(conf)) {
+ /* set tail block */
+ idx = num_blocks - 1;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base, 0, conf->off_in_tail);
+ }
+ if (has_full_blocks(conf)) {
+ /* set full block */
+ idx = conf->off_in_head ? 1 : 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ /*
+ * since we re-use the buffer,
+ * zeroes will be set every time
+ * before encryption, see submit_full()
+ */
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ENOMEM;
+}
+
+/* A helper for setting up config of partial atoms (which
+ * participate in read-modify-write sequence).
+ *
+ * Calculate and setup precise amount of "extra-bytes"
+ * that should be uptodated at the end of partial (not
+ * necessarily tail!) block.
+ *
+ * Pre-condition: local->old_file_size is valid!
+ * @conf contains setup, which is enough for correct calculation
+ * of has_tail_block(), ->get_offset().
+ */
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype)
+{
+ uint32_t to_block;
+ crypt_local_t *local = frame->local;
+ uint64_t old_file_size = local->old_file_size;
+ struct rmw_atom *partial = atom_by_types(dtype,
+ has_tail_block(conf) ?
+ TAIL_ATOM : HEAD_ATOM);
+
+ if (old_file_size <= partial->offset_at(frame, object))
+ to_block = 0;
+ else {
+ to_block = old_file_size - partial->offset_at(frame, object);
+ if (to_block > get_atom_size(object))
+ to_block = get_atom_size(object);
+ }
+ if (to_block > conf->off_in_tail)
+ conf->gap_in_tail = to_block - conf->off_in_tail;
+ else
+ /*
+ * nothing to uptodate
+ */
+ conf->gap_in_tail = 0;
+}
+
+/*
+ * fill struct avec_config with offsets layouts
+ */
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t set_gap)
+{
+ crypt_local_t *local;
+ struct object_cipher_info *object;
+ struct avec_config *conf;
+ uint32_t resid;
+
+ uint32_t atom_size;
+ uint32_t atom_bits;
+
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head = 0;
+ uint32_t off_in_tail = 0;
+ uint32_t nr_full_blocks;
+ int32_t size_full_blocks;
+
+ uint32_t acount; /* number of alifned components to write.
+ * The same as number of occupied logical
+ * blocks (atoms)
+ */
+ local = frame->local;
+ object = &local->info->cinfo;
+ conf = (dtype == DATA_ATOM ?
+ get_data_conf(frame) : get_hole_conf(frame));
+
+ orig_offset = offset;
+ orig_size = count;
+
+ atom_size = get_atom_size(object);
+ atom_bits = get_atom_bits(object);
+
+ /*
+ * Round-down the start,
+ * round-up the end.
+ */
+ resid = offset & (uint64_t)(atom_size - 1);
+
+ if (resid)
+ off_in_head = resid;
+ aligned_offset = offset - off_in_head;
+ expanded_size = orig_size + off_in_head;
+
+ /* calculate tail,
+ expand size forward */
+ resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
+
+ if (resid) {
+ off_in_tail = resid;
+ expanded_size += (atom_size - off_in_tail);
+ }
+ /*
+ * calculate number of occupied blocks
+ */
+ acount = expanded_size >> atom_bits;
+ /*
+ * calculate number of full blocks
+ */
+ size_full_blocks = expanded_size;
+ if (off_in_head)
+ size_full_blocks -= atom_size;
+ if (off_in_tail && size_full_blocks > 0)
+ size_full_blocks -= atom_size;
+ nr_full_blocks = size_full_blocks >> atom_bits;
+
+ conf->atom_size = atom_size;
+ conf->orig_size = orig_size;
+ conf->orig_offset = orig_offset;
+ conf->expanded_size = expanded_size;
+ conf->aligned_offset = aligned_offset;
+
+ conf->off_in_head = off_in_head;
+ conf->off_in_tail = off_in_tail;
+ conf->nr_full_blocks = nr_full_blocks;
+ conf->acount = acount;
+ /*
+ * Finally, calculate precise amount of
+ * "extra-bytes" that should be uptodated
+ * at the end.
+ * Only if RMW is expected.
+ */
+ if (off_in_tail && set_gap)
+ set_gap_at_end(frame, object, conf, dtype);
+}
+
+struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
+ [AES_CIPHER_ALG][XTS_CIPHER_MODE] =
+ { .atomic = _gf_true,
+ .should_pad = _gf_true,
+ .blkbits = AES_BLOCK_BITS,
+ .init = aes_xts_init,
+ .set_private = set_private_aes_xts,
+ .check_key = check_key_aes_xts,
+ .set_iv = set_iv_aes_xts,
+ .encrypt = encrypt_aes_xts
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
new file mode 100644
index 000000000..4a1d3bb5a
--- /dev/null
+++ b/xlators/encryption/crypt/src/keys.c
@@ -0,0 +1,302 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/* Key hierarchy
+
+ +----------------+
+ | MASTER_VOL_KEY |
+ +-------+--------+
+ |
+ |
+ +----------------+----------------+
+ | | |
+ | | |
+ +-------+------+ +-------+-------+ +------+--------+
+ | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
+ +-------+------+ +---------------+ +---------------+
+ |
+ |
+ +-------+-------+
+ | NMTD_LINK_KEY |
+ +---------------+
+
+ */
+
+#if DEBUG_CRYPT
+static void check_prf_iters(uint32_t num_iters)
+{
+ if (num_iters == 0)
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad number of prf iterations : %d", num_iters);
+}
+#else
+#define check_prf_iters(num_iters) noop
+#endif /* DEBUG_CRYPT */
+
+unsigned char crypt_fake_oid[16] =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+/*
+ * derive key in the counter mode using
+ * sha256-based HMAC as PRF, see
+ * NIST Special Publication 800-108, 5.1)
+ */
+
+#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
+
+static int32_t kderive_init(struct kderive_context *ctx,
+ const unsigned char *pkey, /* parent key */
+ uint32_t pkey_size, /* parent key size */
+ const unsigned char *idctx, /* id-context */
+ uint32_t idctx_size,
+ crypt_key_type type /* type of child key */)
+{
+ unsigned char *pos;
+ uint32_t llen = strlen(crypt_keys[type].label);
+ /*
+ * Compoud the fixed input data for KDF:
+ * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
+ * NIST SP 800-108, 5.1
+ */
+ ctx->fid_len =
+ sizeof(uint32_t) +
+ llen +
+ 1 +
+ idctx_size +
+ sizeof(uint32_t);
+
+ ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
+ if (!ctx->fid)
+ return ENOMEM;
+ ctx->out_len = round_up(crypt_keys[type].len >> 3,
+ PRF_OUTPUT_SIZE);
+ ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
+ if (!ctx->out) {
+ GF_FREE(ctx->fid);
+ return ENOMEM;
+ }
+ ctx->pkey = pkey;
+ ctx->pkey_len = pkey_size;
+ ctx->ckey_len = crypt_keys[type].len;
+
+ pos = ctx->fid;
+
+ /* counter will be set up in kderive_rfn() */
+ pos += sizeof(uint32_t);
+
+ memcpy(pos, crypt_keys[type].label, llen);
+ pos += llen;
+
+ /* set up zero octet */
+ *pos = 0;
+ pos += 1;
+
+ memcpy(pos, idctx, idctx_size);
+ pos += idctx_size;
+
+ *((uint32_t *)pos) = htobe32(ctx->ckey_len);
+
+ return 0;
+}
+
+static void kderive_update(struct kderive_context *ctx)
+{
+ uint32_t i;
+ HMAC_CTX hctx;
+ unsigned char *pos = ctx->out;
+ uint32_t *p_iter = (uint32_t *)ctx->fid;
+ uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
+
+ check_prf_iters(num_iters);
+
+ HMAC_CTX_init(&hctx);
+ for (i = 0; i < num_iters; i++) {
+ /*
+ * update the iteration number in the fid
+ */
+ *p_iter = htobe32(i);
+ HMAC_Init_ex(&hctx,
+ ctx->pkey, ctx->pkey_len >> 3,
+ EVP_sha256(),
+ NULL);
+ HMAC_Update(&hctx, ctx->fid, ctx->fid_len);
+ HMAC_Final(&hctx, pos, NULL);
+
+ pos += PRF_OUTPUT_SIZE;
+ }
+ HMAC_CTX_cleanup(&hctx);
+}
+
+static void kderive_final(struct kderive_context *ctx, unsigned char *child)
+{
+ memcpy(child, ctx->out, ctx->ckey_len >> 3);
+ GF_FREE(ctx->fid);
+ GF_FREE(ctx->out);
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/*
+ * derive per-volume key for object ids aithentication
+ */
+int32_t get_nmtd_vol_key(struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ crypt_fake_oid, sizeof(uuid_t), NMTD_VOL_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, master->m_nmtd_key);
+ return 0;
+}
+
+/*
+ * derive per-link key for aithentication of non-encrypted
+ * meta-data (nmtd)
+ */
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_nmtd_key,
+ nmtd_vol_key_size(),
+ (const unsigned char *)loc->path,
+ strlen(loc->path), NMTD_LINK_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+/*
+ * derive per-file key for encryption and authentication
+ * of encrypted part of metadata (emtd)
+ */
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), EMTD_FILE_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+static int32_t data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
+{
+ int32_t ret = 0;
+ switch (keysize) {
+ case 256:
+ *type = DATA_FILE_KEY_256;
+ break;
+ case 512:
+ *type = DATA_FILE_KEY_512;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
+ keysize);
+ ret = ENOTSUP;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * derive per-file key for data encryption
+ */
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+ crypt_key_type type;
+
+ ret = data_key_type_by_size(keysize, &type);
+ if (ret)
+ return ret;
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), type);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, key);
+ return 0;
+}
+
+/*
+ * NOTE: Don't change existing keys: it will break compatibility;
+ */
+struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
+ [MASTER_VOL_KEY] =
+ { .len = MASTER_VOL_KEY_SIZE << 3,
+ .label = "volume-master",
+ },
+ [NMTD_VOL_KEY] =
+ { .len = NMTD_VOL_KEY_SIZE << 3,
+ .label = "volume-nmtd-key-generation"
+ },
+ [NMTD_LINK_KEY] =
+ { .len = 128,
+ .label = "link-nmtd-authentication"
+ },
+ [EMTD_FILE_KEY] =
+ { .len = 128,
+ .label = "file-emtd-encryption-and-auth"
+ },
+ [DATA_FILE_KEY_256] =
+ { .len = 256,
+ .label = "file-data-encryption-256"
+ },
+ [DATA_FILE_KEY_512] =
+ { .len = 512,
+ .label = "file-data-encryption-512"
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
new file mode 100644
index 000000000..36b14c055
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.c
@@ -0,0 +1,605 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+#include "metadata.h"
+
+int32_t alloc_format(crypt_local_t *local, size_t size)
+{
+ if (size > 0) {
+ local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
+ if (!local->format)
+ return ENOMEM;
+ }
+ local->format_size = size;
+ return 0;
+}
+
+int32_t alloc_format_create(crypt_local_t *local)
+{
+ return alloc_format(local, new_format_size());
+}
+
+void free_format(crypt_local_t *local)
+{
+ GF_FREE(local->format);
+}
+
+/*
+ * Check compatibility with extracted metadata
+ */
+static int32_t check_file_metadata(struct crypt_inode_info *info)
+{
+ struct object_cipher_info *object = &info->cinfo;
+
+ if (info->nr_minor != CRYPT_XLATOR_ID) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported minor subversion %d", info->nr_minor);
+ return EINVAL;
+ }
+ if (object->o_alg > LAST_CIPHER_ALG) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher algorithm %d",
+ object->o_alg);
+ return EINVAL;
+ }
+ if (object->o_mode > LAST_CIPHER_MODE) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher mode %d",
+ object->o_mode);
+ return EINVAL;
+ }
+ if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
+ object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
+ gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
+ object->o_block_bits);
+ return EINVAL;
+ }
+ /* TBD: check data key size */
+ return 0;
+}
+
+static size_t format_size_v1(mtd_op_t op, size_t old_size)
+{
+
+ switch (op) {
+ case MTD_CREATE:
+ return sizeof(struct mtd_format_v1);
+ case MTD_OVERWRITE:
+ return old_size;
+ case MTD_APPEND:
+ return old_size + NMTD_8_MAC_SIZE;
+ case MTD_CUT:
+ if (old_size > sizeof(struct mtd_format_v1))
+ return old_size - NMTD_8_MAC_SIZE;
+ else
+ return 0;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
+ return 0;
+ }
+}
+
+/*
+ * Calculate size of the updated format string.
+ * Returned zero means that we don't need to update the format string.
+ */
+size_t format_size(mtd_op_t op, size_t old_size)
+{
+ size_t versioned;
+
+ versioned = mtd_loaders[current_mtd_loader()].format_size(op,
+ old_size - sizeof(struct crypt_format));
+ if (versioned != 0)
+ return versioned + sizeof(struct crypt_format);
+ return 0;
+}
+
+/*
+ * size of the format string of newly created file (nr_links = 1)
+ */
+size_t new_format_size(void)
+{
+ return format_size(MTD_CREATE, 0);
+}
+
+/*
+ * Calculate per-link MAC by pathname
+ */
+static int32_t calc_link_mac_v1(struct mtd_format_v1 *fmt,
+ loc_t *loc,
+ unsigned char *result,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char nmtd_link_key[16];
+ CMAC_CTX *cctx;
+ size_t len;
+
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
+ return -1;
+ }
+ cctx = CMAC_CTX_new();
+ if (!cctx) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
+ return -1;
+ }
+ ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
+ EVP_aes_128_cbc(), 0);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Final(cctx, result, &len);
+ CMAC_CTX_free(cctx);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create per-link MAC of index @idx by pathname
+ */
+static int32_t create_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
+ return 0;
+}
+
+static int32_t create_format_v1(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ unsigned char nmtd_link_key[16];
+ uint32_t ad;
+ GCM128_CONTEXT *gctx;
+
+ fmt = (struct mtd_format_v1 *)wire;
+
+ fmt->minor_id = info->nr_minor;
+ fmt->alg_id = AES_CIPHER_ALG;
+ fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
+ fmt->block_bits = master->m_block_bits;
+ fmt->mode_id = master->m_mode;
+ /*
+ * retrieve keys for the parts of metadata
+ */
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret)
+ return ret;
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret)
+ return ret;
+
+ AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+
+ /* TBD: Check return values */
+
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_encrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * set MAC of encrypted part of metadata
+ */
+ CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
+ CRYPTO_gcm128_release(gctx);
+ /*
+ * set the first MAC of non-encrypted part of metadata
+ */
+ return create_link_mac_v1(fmt, 0, loc, info, master);
+}
+
+/*
+ * Called by fops:
+ * ->create();
+ * ->link();
+ *
+ * Pack common and version-specific parts of file's metadata
+ * Pre-conditions: @info contains valid object-id.
+ */
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ struct crypt_format *fmt = (struct crypt_format *)wire;
+
+ fmt->loader_id = current_mtd_loader();
+
+ wire += sizeof(struct crypt_format);
+ return mtd_loaders[current_mtd_loader()].create_format(wire, loc,
+ info, master);
+}
+
+/*
+ * Append or overwrite per-link mac of @mac_idx index
+ * in accordance with the new pathname
+ */
+int32_t appov_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new, old, old_size);
+ return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx,
+ loc, info, master);
+}
+
+/*
+ * Cut per-link mac of @mac_idx index
+ */
+static int32_t cut_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new,
+ old,
+ sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
+
+ memcpy(new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1),
+ old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
+ old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
+ return 0;
+}
+
+int32_t update_format_v1(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx, /* of old name */
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ switch (op) {
+ case MTD_APPEND:
+ mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1))/8;
+ case MTD_OVERWRITE:
+ return appov_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ case MTD_CUT:
+ return cut_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
+ return -1;
+ }
+}
+
+/*
+ * Called by fops:
+ *
+ * ->link()
+ * ->unlink()
+ * ->rename()
+ *
+ */
+int32_t update_format(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ if (!new)
+ return 0;
+ memcpy(new, old, sizeof(struct crypt_format));
+
+ old += sizeof(struct crypt_format);
+ new += sizeof(struct crypt_format);
+ old_len -= sizeof(struct crypt_format);
+
+ return mtd_loaders[current_mtd_loader()].update_format(new, old,
+ old_len,
+ mac_idx, op,
+ loc, info,
+ master, local);
+}
+
+/*
+ * Perform preliminary checks of found metadata
+ * Return < 0 on errors;
+ * Return number of object-id MACs (>= 1) on success
+ */
+int32_t check_format_v1(uint32_t len, unsigned char *wire)
+{
+ uint32_t nr_links;
+
+ if (len < sizeof(struct mtd_format_v1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata size %d", len);
+ goto error;
+ }
+ len -= sizeof(struct mtd_format_v1);
+ if (len % sizeof(nmtd_8_mac_t)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata format");
+ goto error;
+ }
+ nr_links = 1 + len / sizeof(nmtd_8_mac_t);
+ if (nr_links > _POSIX_LINK_MAX)
+ goto error;
+ return nr_links;
+ error:
+ return EIO;
+}
+
+/*
+ * Verify per-link MAC specified by index @idx
+ *
+ * return:
+ * -1 on errors;
+ * 0 on failed verification;
+ * 1 on sucessful verification
+ */
+static int32_t verify_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx /* index of the mac to verify */,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
+ return 0;
+ return 1;
+}
+
+/*
+ * Lookup per-link MAC by pathname.
+ *
+ * return index of the MAC, if it was found;
+ * return < 0 on errors, or if the MAC wasn't found
+ */
+static int32_t lookup_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t nr_macs,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ uint32_t idx;
+
+ for (idx = 0; idx < nr_macs; idx++) {
+ ret = verify_link_mac_v1(fmt, idx, loc, info, master);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return idx;
+ }
+ return -ENOENT;
+}
+
+/*
+ * Extract version-specific part of metadata
+ */
+static int32_t open_format_v1(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ int32_t ret;
+ int32_t num_nmtd_macs;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ GCM128_CONTEXT *gctx;
+ uint32_t ad;
+ emtd_8_mac_t gmac;
+ struct object_cipher_info *object;
+
+ num_nmtd_macs = check_format_v1(len, wire);
+ if (num_nmtd_macs <= 0)
+ return EIO;
+ fmt = (struct mtd_format_v1 *)wire;
+
+ ret = lookup_link_mac_v1(fmt, num_nmtd_macs, loc, info, master);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
+ return EINVAL;
+ }
+ local->mac_idx = ret;
+ if (load_info == _gf_false)
+ /* the case of partial open */
+ return 0;
+
+ object = &info->cinfo;
+
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
+ return ret;
+ }
+ /*
+ * decrypt encrypted meta-data
+ */
+ ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
+ return ret;
+ }
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+ if (!gctx) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
+ return ENOMEM;
+ }
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_decrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * verify metadata
+ */
+ CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
+ CRYPTO_gcm128_release(gctx);
+ if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
+ gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
+ return EINVAL;
+ }
+ /*
+ * load verified metadata to the private part of inode
+ */
+ info->nr_minor = fmt->minor_id;
+
+ object->o_alg = fmt->alg_id;
+ object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
+ object->o_block_bits = fmt->block_bits;
+ object->o_mode = fmt->mode_id;
+
+ return check_file_metadata(info);
+}
+
+/*
+ * perform metadata authentication against @loc->path;
+ * extract crypt-specific attribtes and populate @info
+ * with them (optional)
+ */
+int32_t open_format(unsigned char *str,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ struct crypt_format *fmt;
+ if (len < sizeof(*fmt)) {
+ gf_log("crypt", GF_LOG_ERROR, "Bad core format");
+ return EIO;
+ }
+ fmt = (struct crypt_format *)str;
+
+ if (fmt->loader_id >= LAST_MTD_LOADER) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Unsupported loader id %d", fmt->loader_id);
+ return EINVAL;
+ }
+ str += sizeof(*fmt);
+ len -= sizeof(*fmt);
+
+ return mtd_loaders[fmt->loader_id].open_format(str,
+ len,
+ loc,
+ info,
+ master,
+ local,
+ load_info);
+}
+
+struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER] = {
+ [MTD_LOADER_V1] =
+ {.format_size = format_size_v1,
+ .create_format = create_format_v1,
+ .open_format = open_format_v1,
+ .update_format = update_format_v1
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
new file mode 100644
index 000000000..a92f149ef
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __METADATA_H__
+#define __METADATA_H__
+
+#define NMTD_8_MAC_SIZE (8)
+#define EMTD_8_MAC_SIZE (8)
+
+typedef uint8_t nmtd_8_mac_t[NMTD_8_MAC_SIZE];
+typedef uint8_t emtd_8_mac_t[EMTD_8_MAC_SIZE] ;
+
+/*
+ * Version "v1" of file's metadata.
+ * Metadata of this version has 4 components:
+ *
+ * 1) EMTD (Encrypted part of MeTaData);
+ * 2) NMTD (Non-encrypted part of MeTaData);
+ * 3) EMTD_MAC; (EMTD Message Authentication Code);
+ * 4) Array of per-link NMTD MACs (for every (hard)link it includes
+ * exactly one MAC)
+ */
+struct mtd_format_v1 {
+ /* EMTD, encrypted part of meta-data */
+ uint8_t alg_id; /* cipher algorithm id (only AES for now) */
+ uint8_t mode_id; /* cipher mode id; (only XTS for now) */
+ uint8_t block_bits; /* encoded block size */
+ uint8_t minor_id; /* client translator id */
+ uint8_t dkey_factor; /* encoded size of the data key */
+ /* MACs */
+ emtd_8_mac_t gmac; /* MAC of the encrypted meta-data, 8 bytes */
+ nmtd_8_mac_t omac; /* per-link MACs of the non-encrypted
+ * meta-data: at least one such MAC is always
+ * present */
+} __attribute__((packed));
+
+/*
+ * NMTD, the non-encrypted part of metadata of version "v1"
+ * is file's gfid, which is generated on trusted machines.
+ */
+#define SIZE_OF_NMTD_V1 (sizeof(uuid_t))
+#define SIZE_OF_EMTD_V1 (offsetof(struct mtd_format_v1, gmac) - \
+ offsetof(struct mtd_format_v1, alg_id))
+#define SIZE_OF_NMTD_V1_MAC (NMTD_8_MAC_SIZE)
+#define SIZE_OF_EMTD_V1_MAC (EMTD_8_MAC_SIZE)
+
+static inline unsigned char *get_EMTD_V1(struct mtd_format_v1 *format)
+{
+ return &format->alg_id;
+}
+
+static inline unsigned char *get_NMTD_V1(struct crypt_inode_info *info)
+{
+ return info->oid;
+}
+
+static inline unsigned char *get_EMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->gmac;
+}
+
+static inline unsigned char *get_NMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->omac;
+}
+
+#endif /* __METADATA_H__ */
diff --git a/xlators/encryption/rot-13/src/Makefile.am b/xlators/encryption/rot-13/src/Makefile.am
index ba5e623d8..94e8d18e7 100644
--- a/xlators/encryption/rot-13/src/Makefile.am
+++ b/xlators/encryption/rot-13/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = rot-13.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-rot_13_la_LDFLAGS = -module -avoidversion
+rot_13_la_LDFLAGS = -module -avoid-version
rot_13_la_SOURCES = rot-13.c
rot_13_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = rot-13.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c
index 3cf925ed8..b9ac29a72 100644
--- a/xlators/encryption/rot-13/src/rot-13.c
+++ b/xlators/encryption/rot-13/src/rot-13.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <ctype.h>
#include <sys/uio.h>
@@ -32,13 +22,13 @@
#include "rot-13.h"
/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
+ * This is a rot13 ``encryption'' xlator. It rot13's data when
+ * writing to disk and rot13's it back when reading it.
* This xlator is meant as an example, NOT FOR PRODUCTION
* USE ;) (hence no error-checking)
*/
-void
+void
rot13 (char *buf, int len)
{
int i;
@@ -68,14 +58,15 @@ rot13_readv_cbk (call_frame_t *frame,
struct iovec *vector,
int32_t count,
struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
-
+
if (priv->decrypt_read)
rot13_iovec (vector, count);
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
return 0;
}
@@ -84,13 +75,13 @@ rot13_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame,
rot13_readv_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -101,9 +92,10 @@ rot13_writev_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -112,20 +104,20 @@ rot13_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
if (priv->encrypt_write)
rot13_iovec (vector, count);
- STACK_WIND (frame,
+ STACK_WIND (frame,
rot13_writev_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset,
- iobref);
+ fd, vector, count, offset, flags,
+ iobref, xdata);
return 0;
}
@@ -136,7 +128,7 @@ init (xlator_t *this)
rot_13_private_t *priv = NULL;
if (!this->children || this->children->next) {
- gf_log ("rot13", GF_LOG_ERROR,
+ gf_log ("rot13", GF_LOG_ERROR,
"FATAL: rot13 should have exactly one child");
return -1;
}
@@ -194,15 +186,14 @@ struct xlator_fops fops = {
.writev = rot13_writev
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
- { .key = {"encrypt-write"},
+ { .key = {"encrypt-write"},
.type = GF_OPTION_TYPE_BOOL
},
- { .key = {"decrypt-read"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
},
{ .key = {NULL} },
};
diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h
index 8ef8162ae..3e9fc19c7 100644
--- a/xlators/encryption/rot-13/src/rot-13.h
+++ b/xlators/encryption/rot-13/src/rot-13.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ROT_13_H__
#define __ROT_13_H__
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 809bbe510..d2f5ef192 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = locks trash quota read-only mac-compat quiesce marker#path-converter # filter
+SUBDIRS = locks quota read-only mac-compat quiesce marker index \
+ protect compress changelog gfid-access $(GLUPY_SUBDIR) qemu-block # trash path-converter # filter
CLEANFILES =
diff --git a/xlators/features/changelog/Makefile.am b/xlators/features/changelog/Makefile.am
new file mode 100644
index 000000000..153bb6850
--- /dev/null
+++ b/xlators/features/changelog/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src lib
+
+CLEANFILES =
diff --git a/xlators/storage/bdb/Makefile.am b/xlators/features/changelog/lib/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/xlators/storage/bdb/Makefile.am
+++ b/xlators/features/changelog/lib/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c
new file mode 100644
index 000000000..14562585a
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes.c
@@ -0,0 +1,87 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) \
+ printf ("%s (reason: %s)\n", fn, strerror (errno))
+
+int
+main (int argc, char ** argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {0,};
+
+ /* get changes for brick "/home/vshankar/export/yow/yow-1" */
+ ret = gf_changelog_register ("/home/vshankar/export/yow/yow-1",
+ "/tmp/scratch", "/tmp/change.log", 9, 5);
+ if (ret) {
+ handle_error ("register failed");
+ goto out;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan ();
+ if (nr_changes < 0) {
+ handle_error ("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ printf ("Got %ld changelog files\n", nr_changes);
+
+ while ( (changes =
+ gf_changelog_next_change (fbuf, PATH_MAX)) > 0) {
+ printf ("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done (fbuf);
+ if (ret)
+ handle_error ("gf_changelog_done");
+ }
+
+ if (changes == -1)
+ handle_error ("gf_changelog_next_change");
+
+ next:
+ sleep (10);
+ }
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py
new file mode 100644
index 000000000..d21db8eab
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/changes.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+import os
+import sys
+import time
+import libgfchangelog
+
+cl = libgfchangelog.Changes()
+
+def get_changes(brick, scratch_dir, log_file, log_level, interval):
+ change_list = []
+ try:
+ cl.cl_register(brick, scratch_dir, log_file, log_level)
+ while True:
+ cl.cl_scan()
+ change_list = cl.cl_getchanges()
+ if change_list:
+ print change_list
+ for change in change_list:
+ print('done with %s' % (change))
+ cl.cl_done(change)
+ time.sleep(interval)
+ except OSError:
+ ex = sys.exc_info()[1]
+ print ex
+
+if __name__ == '__main__':
+ if len(sys.argv) != 5:
+ print("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>"
+ % (sys.argv[0]))
+ sys.exit(1)
+ get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4]))
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
new file mode 100644
index 000000000..68ec3baf1
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -0,0 +1,64 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
new file mode 100644
index 000000000..fbaaea628
--- /dev/null
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -0,0 +1,37 @@
+libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/features/changelog/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(GF_GLUSTERFS_LIBS)
+
+libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS)
+
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
+lib_LTLIBRARIES = libgfchangelog.la
+
+CONTRIB_BUILDDIR = $(top_builddir)/contrib
+
+libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-process.c \
+ gf-changelog-helpers.c $(CONTRIBDIR)/uuid/clear.c \
+ $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c \
+ $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/parse.c \
+ $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c \
+ $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c \
+ $(CONTRIBDIR)/uuid/unpack.c
+
+noinst_HEADERS = gf-changelog-helpers.h $(CONTRIBDIR)/uuid/uuidd.h \
+ $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
+ $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+libgfchangelog_HEADERS = changelog.h
+
+CLEANFILES =
+CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/changelog/lib/src/changelog.h b/xlators/features/changelog/lib/src/changelog.h
new file mode 100644
index 000000000..5cddfb583
--- /dev/null
+++ b/xlators/features/changelog/lib/src/changelog.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+/* API set */
+
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan ();
+
+int
+gf_changelog_start_fresh ();
+
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done (char *file);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
new file mode 100644
index 000000000..1eef8bf04
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-mem-types.h"
+#include "gf-changelog-helpers.h"
+
+ssize_t gf_changelog_read_path (int fd, char *buffer, size_t bufsize)
+{
+ return read (fd, buffer, bufsize);
+}
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t writen = 0;
+
+ while (writen < len) {
+ size = write (fd,
+ buffer + writen, len - writen);
+ if (size <= 0)
+ break;
+
+ writen += size;
+ }
+
+ return writen;
+}
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr)
+{
+ for (; *s; s++) {
+ if (estr[*s])
+ sprintf(enc, "%c", estr[*s]);
+ else
+ sprintf(enc, "%%%02X", *s);
+ while (*++enc);
+ }
+}
+
+/**
+ * thread safe version of readline with buffering
+ * (taken from Unix Network Programming Volume I, W.R. Stevens)
+ *
+ * This is favoured over fgets() as we'd need to ftruncate()
+ * (see gf_changelog_scan() API) to record new changelog files.
+ * stream open functions does have a truncate like api (although
+ * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp),
+ * but this involves mixing POSIX file descriptors and stream FILE *).
+ *
+ * NOTE: This implmentation still does work with more than one fd's
+ * used to perform gf_readline(). For this very reason it's not
+ * made a part of libglusterfs.
+ */
+
+static pthread_key_t rl_key;
+static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
+
+static void
+readline_destructor (void *ptr)
+{
+ GF_FREE (ptr);
+}
+
+static void
+readline_once (void)
+{
+ pthread_key_create (&rl_key, readline_destructor);
+}
+
+static ssize_t
+my_read (read_line_t *tsd, int fd, char *ptr)
+{
+ if (tsd->rl_cnt <= 0) {
+ if ( (tsd->rl_cnt = read (fd, tsd->rl_buf, MAXLINE)) < 0 )
+ return -1;
+ else if (tsd->rl_cnt == 0)
+ return 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+ }
+
+ tsd->rl_cnt--;
+ *ptr = *tsd->rl_bufptr++;
+ return 1;
+}
+
+static int
+gf_readline_init_once (read_line_t **tsd)
+{
+ if (pthread_once (&rl_once, readline_once) != 0)
+ return -1;
+
+ *tsd = pthread_getspecific (rl_key);
+ if (*tsd)
+ goto out;
+
+ *tsd = GF_CALLOC (1, sizeof (**tsd),
+ gf_changelog_mt_libgfchangelog_rl_t);
+ if (!*tsd)
+ return -1;
+
+ if (pthread_setspecific (rl_key, *tsd) != 0)
+ return -1;
+
+ out:
+ return 0;
+}
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen)
+{
+ size_t n = 0;
+ size_t rc = 0;
+ char c = ' ';
+ char *ptr = NULL;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ ptr = vptr;
+ for (n = 1; n < maxlen; n++) {
+ if ( (rc = my_read (tsd, fd, &c)) == 1 ) {
+ *ptr++ = c;
+ if (c == '\n')
+ break;
+ } else if (rc == 0) {
+ *ptr = '\0';
+ return (n - 1);
+ } else
+ return -1;
+ }
+
+ *ptr = '\0';
+ return n;
+
+}
+
+off_t
+gf_lseek (int fd, off_t offset, int whence)
+{
+ off_t off = 0;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if ( (off = lseek (fd, offset, whence)) == -1)
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return off;
+}
+
+int
+gf_ftruncate (int fd, off_t length)
+{
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if (ftruncate (fd, 0))
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return 0;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
new file mode 100644
index 000000000..3aa6ed7b8
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_HELPERS_H
+#define _GF_CHANGELOG_HELPERS_H
+
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include <pthread.h>
+
+#include <xlator.h>
+
+#define GF_CHANGELOG_TRACKER "tracker"
+
+#define GF_CHANGELOG_CURRENT_DIR ".current"
+#define GF_CHANGELOG_PROCESSED_DIR ".processed"
+#define GF_CHANGELOG_PROCESSING_DIR ".processing"
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) do { \
+ memcpy (ascii + off, ptr, len); \
+ off += len; \
+ } while (0)
+
+typedef struct read_line {
+ int rl_cnt;
+ char *rl_bufptr;
+ char rl_buf[MAXLINE];
+} read_line_t;
+
+typedef struct gf_changelog {
+ xlator_t *this;
+
+ /* 'processing' directory stream */
+ DIR *gfc_dir;
+
+ /* fd to the tracker file */
+ int gfc_fd;
+
+ /* connection retries */
+ int gfc_connretries;
+
+ char gfc_sockpath[PATH_MAX];
+
+ char gfc_brickpath[PATH_MAX];
+
+ /* socket for recieving notifications */
+ int gfc_sockfd;
+
+ char *gfc_working_dir;
+
+ /* RFC 3986 string encoding */
+ char rfc3986[256];
+
+ char gfc_current_dir[PATH_MAX];
+ char gfc_processed_dir[PATH_MAX];
+ char gfc_processing_dir[PATH_MAX];
+
+ pthread_t gfc_changelog_processor;
+} gf_changelog_t;
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc);
+
+void *
+gf_changelog_process (void *data);
+
+ssize_t
+gf_changelog_read_path (int fd, char *buffer, size_t bufsize);
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr);
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len);
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen);
+
+int
+gf_ftruncate (int fd, off_t length);
+
+off_t
+gf_lseek (int fd, off_t offset, int whence);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-process.c b/xlators/features/changelog/lib/src/gf-changelog-process.c
new file mode 100644
index 000000000..df7204931
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-process.c
@@ -0,0 +1,571 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <pthread.h>
+
+#include "uuid.h"
+#include "globals.h"
+#include "glusterfs.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+
+extern int byebye;
+
+/**
+ * number of gfid records after fop number
+ */
+int nr_gfids[] = {
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 1,
+ [GF_FOP_RMDIR] = 1,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+};
+
+static char *
+binary_to_ascii (uuid_t uuid)
+{
+ return uuid_utoa (uuid);
+}
+
+static char *
+conv_noop (char *ptr) { return ptr; }
+
+#define VERIFY_SEPARATOR(ptr, plen, perr) \
+ { \
+ if (*(ptr + plen) != '\0') { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define MOVER_MOVE(mover, nleft, bytes) \
+ { \
+ mover += bytes; \
+ nleft -= bytes; \
+ } \
+
+#define PARSE_GFID(mov, ptr, le, fn, perr) \
+ { \
+ VERIFY_SEPARATOR (mov, le, perr); \
+ ptr = fn (mov); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \
+ { \
+ GF_CHANGELOG_FILL_BUFFER (pt, buf, of, strlen (pt)); \
+ MOVER_MOVE (mo, nl, le); \
+ }
+
+
+#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \
+ { \
+ memcpy (uuid, mover, sizeof (uuid_t)); \
+ ptr = binary_to_ascii (uuid); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ MOVER_MOVE (mover, nleft, sizeof (uuid_t)); \
+ } \
+
+#define LINE_BUFSIZE 3*PATH_MAX /* enough buffer for extra chars too */
+
+/**
+ * using mmap() makes parsing easy. fgets() cannot be used here as
+ * the binary gfid could contain a line-feed (0x0A), in that case fgets()
+ * would read an incomplete line and parsing would fail. using POSIX fds
+ * would result is additional code to maintain state in case of partial
+ * reads of data (where multiple entries do not fit extirely in the buffer).
+ *
+ * mmap() gives the flexibility of pointing to an offset in the file
+ * without us worrying about reading it in memory (VM does that for us for
+ * free).
+ */
+
+static int
+gf_changelog_parse_binary (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+
+{
+ int ret = -1;
+ off_t off = 0;
+ off_t nleft = 0;
+ uuid_t uuid = {0,};
+ char *ptr = NULL;
+ char *bname_start = NULL;
+ char *bname_end = NULL;
+ char *mover = NULL;
+ char *start = NULL;
+ char current_mover = ' ';
+ size_t blen = 0;
+ int parse_err = 0;
+ char ascii[LINE_BUFSIZE] = {0,};
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+
+ off = blen = 0;
+ ptr = bname_start = bname_end = NULL;
+
+ current_mover = *mover;
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ bname_start = mover;
+ if ( (bname_end = strchr (mover, '\n')) == NULL ) {
+ parse_err = 1;
+ break;
+ }
+
+ blen = bname_end - bname_start;
+ MOVER_MOVE (mover, nleft, blen);
+
+ break;
+
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (ptr, ascii, off, strlen (ptr));
+ if (blen)
+ GF_CHANGELOG_FILL_BUFFER (bname_start,
+ ascii, off, blen);
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing binary changelog failed due to "
+ " error in writing ascii change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+ out:
+ return ret;
+}
+
+/**
+ * ascii decoder:
+ * - separate out one entry from another
+ * - use fop name rather than fop number
+ */
+static int
+gf_changelog_parse_ascii (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+{
+ int ng = 0;
+ int ret = -1;
+ int fop = 0;
+ int len = 0;
+ off_t off = 0;
+ off_t nleft = 0;
+ char *ptr = NULL;
+ char *eptr = NULL;
+ char *start = NULL;
+ char *mover = NULL;
+ int parse_err = 0;
+ char current_mover = ' ';
+ char ascii[LINE_BUFSIZE] = {0,};
+ const char *fopname = NULL;
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+ off = 0;
+ current_mover = *mover;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE(ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE (ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE (" ", ascii, off,
+ mover, nleft, 1);
+
+ /* fop */
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ fop = atoi (mover);
+ if ( (fopname = gf_fop_list[fop]) == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, len);
+
+ len = strlen (fopname);
+ GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len);
+
+ /* pargfid + bname */
+ ng = nr_gfids[fop];
+ while (ng-- > 0) {
+ MOVER_MOVE (mover, nleft, 1);
+ len = strlen (mover);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ PARSE_GFID (mover, ptr, len,
+ conv_noop, parse_err);
+ eptr = calloc (3, strlen (ptr));
+ if (!eptr) {
+ parse_err = 1;
+ break;
+ }
+
+ gf_rfc3986_encode ((unsigned char *) ptr,
+ eptr, gfc->rfc3986);
+ FILL_AND_MOVE (eptr, ascii, off,
+ mover, nleft, len);
+ free (eptr);
+ }
+
+ break;
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing ascii changelog failed due to "
+ " wrror in writing change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+
+ out:
+ return ret;
+}
+
+#define COPY_BUFSIZE 8192
+static int
+gf_changelog_copy (xlator_t *this, int from_fd, int to_fd)
+{
+ ssize_t size = 0;
+ char buffer[COPY_BUFSIZE+1] = {0,};
+
+ while (1) {
+ size = read (from_fd, buffer, COPY_BUFSIZE);
+ if (size <= 0)
+ break;
+
+ if (gf_changelog_write (to_fd,
+ buffer, size) != size) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error processing ascii changlog");
+ size = -1;
+ break;
+ }
+ }
+
+ return (size < 0 ? -1 : 0);
+}
+
+static int
+gf_changelog_decode (xlator_t *this, gf_changelog_t *gfc, int from_fd,
+ int to_fd, struct stat *stbuf, int *zerob)
+{
+ int ret = -1;
+ int encoding = -1;
+ size_t elen = 0;
+ char buffer[1024] = {0,};
+
+ CHANGELOG_GET_ENCODING (from_fd, buffer, 1024, encoding, elen);
+ if (encoding == -1) /* unknown encoding */
+ goto out;
+
+ if (!CHANGELOG_VALID_ENCODING (encoding))
+ goto out;
+
+ if (elen == stbuf->st_size) {
+ *zerob = 1;
+ goto out;
+ }
+
+ /**
+ * start processing after the header
+ */
+ lseek (from_fd, elen, SEEK_SET);
+
+ switch (encoding) {
+ case CHANGELOG_ENCODE_BINARY:
+ /**
+ * this ideally should have been a part of changelog-encoders.c
+ * (ie. part of the changelog translator).
+ */
+ ret = gf_changelog_parse_binary (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+
+ case CHANGELOG_ENCODE_ASCII:
+ ret = gf_changelog_parse_ascii (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+ default:
+ ret = gf_changelog_copy (this, from_fd, to_fd);
+ }
+
+ out:
+ return ret;
+}
+
+static int
+gf_changelog_consume (xlator_t *this, gf_changelog_t *gfc, char *from_path)
+{
+ int ret = -1;
+ int fd1 = 0;
+ int fd2 = 0;
+ int zerob = 0;
+ struct stat stbuf = {0,};
+ char dest[PATH_MAX] = {0,};
+ char to_path[PATH_MAX] = {0,};
+
+ ret = stat (from_path, &stbuf);
+ if (ret || !S_ISREG(stbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stat failed on changelog file: %s", from_path);
+ goto out;
+ }
+
+ fd1 = open (from_path, O_RDONLY);
+ if (fd1 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot open changelog file: %s (reason: %s)",
+ from_path, strerror (errno));
+ goto out;
+ }
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_current_dir, basename (from_path));
+ (void) snprintf (dest, PATH_MAX, "%s%s",
+ gfc->gfc_processing_dir, basename (from_path));
+
+ fd2 = open (to_path, O_CREAT | O_TRUNC | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd2 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot create ascii changelog file %s (reason %s)",
+ to_path, strerror (errno));
+ goto close_fd;
+ } else {
+ ret = gf_changelog_decode (this, gfc, fd1,
+ fd2, &stbuf, &zerob);
+
+ close (fd2);
+
+ if (!ret) {
+ /* move it to processing on a successfull
+ decode */
+ ret = rename (to_path, dest);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error moving %s to processing dir"
+ " (reason: %s)", to_path,
+ strerror (errno));
+ }
+
+ /* remove it from .current if it's an empty file */
+ if (zerob) {
+ ret = unlink (to_path);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not unlink %s (reason: %s",
+ to_path, strerror (errno));
+ }
+ }
+
+ close_fd:
+ close (fd1);
+
+ out:
+ return ret;
+}
+
+static char *
+gf_changelog_ext_change (xlator_t *this,
+ gf_changelog_t *gfc, char *path, size_t readlen)
+{
+ int alo = 0;
+ int ret = 0;
+ size_t len = 0;
+ char *buf = NULL;
+
+ buf = path;
+ while (len < readlen) {
+ if (*buf == '\0') {
+ alo = 1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "processing changelog: %s", path);
+ ret = gf_changelog_consume (this, gfc, path);
+ }
+
+ if (ret)
+ break;
+
+ len++; buf++;
+ if (alo) {
+ alo = 0;
+ path = buf;
+ }
+ }
+
+ return (ret) ? NULL : path;
+}
+
+void *
+gf_changelog_process (void *data)
+{
+ ssize_t len = 0;
+ ssize_t offlen = 0;
+ xlator_t *this = NULL;
+ char *sbuf = NULL;
+ gf_changelog_t *gfc = NULL;
+ char from_path[PATH_MAX] = {0,};
+
+ gfc = (gf_changelog_t *) data;
+ this = gfc->this;
+
+ pthread_detach (pthread_self());
+
+ for (;;) {
+ len = gf_changelog_read_path (gfc->gfc_sockfd,
+ from_path + offlen,
+ PATH_MAX - offlen);
+ if (len < 0)
+ continue; /* ignore it for now */
+
+ if (len == 0) { /* close() from the changelog translator */
+ gf_log (this->name, GF_LOG_INFO, "close from changelog"
+ " notification translator.");
+
+ if (gfc->gfc_connretries != 1) {
+ if (!gf_changelog_notification_init(this, gfc))
+ continue;
+ }
+
+ byebye = 1;
+ break;
+ }
+
+ len += offlen;
+ sbuf = gf_changelog_ext_change (this, gfc, from_path, len);
+ if (!sbuf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not extract changelog filename");
+ continue;
+ }
+
+ offlen = 0;
+ if (sbuf != (from_path + len)) {
+ offlen = from_path + len - sbuf;
+ memmove (from_path, sbuf, offlen);
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "byebye (%d) from processing thread...", byebye);
+ return NULL;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
new file mode 100644
index 000000000..ca8e373e7
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -0,0 +1,515 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "logging.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+int byebye = 0;
+
+static void
+gf_changelog_cleanup (gf_changelog_t *gfc)
+{
+ /* socket */
+ if (gfc->gfc_sockfd != -1)
+ close (gfc->gfc_sockfd);
+ /* tracker fd */
+ if (gfc->gfc_fd != -1)
+ close (gfc->gfc_fd);
+ /* processing dir */
+ if (gfc->gfc_dir)
+ closedir (gfc->gfc_dir);
+
+ if (gfc->gfc_working_dir)
+ free (gfc->gfc_working_dir); /* allocated by realpath */
+}
+
+void
+__attribute__ ((constructor)) gf_changelog_ctor (void)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return;
+
+ if (glusterfs_globals_init (ctx)) {
+ free (ctx);
+ ctx = NULL;
+ return;
+ }
+
+ THIS->ctx = ctx;
+}
+
+void
+__attribute__ ((destructor)) gf_changelog_dtor (void)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ return;
+
+ ctx = this->ctx;
+ gfc = this->private;
+
+ if (gfc) {
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ }
+
+ if (ctx) {
+ pthread_mutex_destroy (&ctx->lock);
+ free (ctx);
+ ctx = NULL;
+ }
+}
+
+
+static int
+gf_changelog_open_dirs (gf_changelog_t *gfc)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ int tracker_fd = 0;
+ char tracker_path[PATH_MAX] = {0,};
+
+ (void) snprintf (gfc->gfc_current_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_CURRENT_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_current_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processed_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSED_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processed_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processing_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSING_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processing_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ dir = opendir (gfc->gfc_processing_dir);
+ if (!dir) {
+ gf_log ("", GF_LOG_ERROR,
+ "opendir() error [reason: %s]", strerror (errno));
+ goto out;
+ }
+
+ gfc->gfc_dir = dir;
+
+ (void) snprintf (tracker_path, PATH_MAX,
+ "%s/"GF_CHANGELOG_TRACKER, gfc->gfc_working_dir);
+
+ tracker_fd = open (tracker_path, O_CREAT | O_APPEND | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (tracker_fd < 0) {
+ closedir (gfc->gfc_dir);
+ ret = -1;
+ goto out;
+ }
+
+ gfc->gfc_fd = tracker_fd;
+ ret = 0;
+ out:
+ return ret;
+}
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc)
+{
+ int ret = 0;
+ int len = 0;
+ int tries = 0;
+ int sockfd = 0;
+ struct sockaddr_un remote;
+
+ this = gfc->this;
+
+ if (gfc->gfc_sockfd != -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reconnecting...");
+ close (gfc->gfc_sockfd);
+ }
+
+ sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ CHANGELOG_MAKE_SOCKET_PATH (gfc->gfc_brickpath,
+ gfc->gfc_sockpath, PATH_MAX);
+ gf_log (this->name, GF_LOG_INFO,
+ "connecting to changelog socket: %s (brick: %s)",
+ gfc->gfc_sockpath, gfc->gfc_brickpath);
+
+ remote.sun_family = AF_UNIX;
+ strcpy (remote.sun_path, gfc->gfc_sockpath);
+
+ len = strlen (remote.sun_path) + sizeof (remote.sun_family);
+
+ while (tries < gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "connection attempt %d/%d...",
+ tries + 1, gfc->gfc_connretries);
+
+ /* initiate a connect */
+ if (connect (sockfd, (struct sockaddr *) &remote, len) == 0) {
+ gfc->gfc_sockfd = sockfd;
+ break;
+ }
+
+ tries++;
+ sleep (2);
+ }
+
+ if (tries == gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not connect to changelog socket!"
+ " bailing out...");
+ ret = -1;
+ } else
+ gf_log (this->name, GF_LOG_INFO,
+ "connection successful");
+
+ out:
+ return ret;
+}
+
+int
+gf_changelog_done (char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char to_path[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (!file || !strlen (file))
+ goto out;
+
+ /* make sure 'file' is inside ->gfc_working_dir */
+ buffer = realpath (file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp (gfc->gfc_working_dir,
+ buffer, strlen (gfc->gfc_working_dir)))
+ goto out;
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_processed_dir, basename (buffer));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "moving %s to processed directory", file);
+ ret = rename (buffer, to_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot move %s to %s (reason: %s)",
+ file, to_path, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (buffer)
+ free (buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * for a set of changelogs, start from the begining
+ */
+int
+gf_changelog_start_fresh ()
+{
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (gf_ftruncate (gfc->gfc_fd, 0))
+ goto out;
+
+ return 0;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * return the next changelog file entry. zero means all chanelogs
+ * consumed.
+ */
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen)
+{
+ ssize_t size = 0;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ tracker_fd = gfc->gfc_fd;
+
+ size = gf_readline (tracker_fd, buffer, maxlen);
+ if (size < 0)
+ goto out;
+ if (size == 0)
+ return 0;
+
+ memcpy (bufptr, buffer, size - 1);
+ *(buffer + size) = '\0';
+
+ return size;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_scan() - scan and generate a list of change entries
+ *
+ * calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ */
+ssize_t
+gf_changelog_scan ()
+{
+ int ret = 0;
+ int tracker_fd = 0;
+ size_t len = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_t *gfc = NULL;
+ struct dirent *entryp = NULL;
+ struct dirent *result = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ /**
+ * do we need to protect 'byebye' with locks? worst, the
+ * consumer would get notified during next scan().
+ */
+ if (byebye) {
+ errno = ECONNREFUSED;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ tracker_fd = gfc->gfc_fd;
+
+ if (gf_ftruncate (tracker_fd, 0))
+ goto out;
+
+ len = offsetof(struct dirent, d_name)
+ + pathconf(gfc->gfc_processing_dir, _PC_NAME_MAX) + 1;
+ entryp = GF_CALLOC (1, len,
+ gf_changelog_mt_libgfchangelog_dirent_t);
+ if (!entryp)
+ goto out;
+
+ rewinddir (gfc->gfc_dir);
+ while (1) {
+ ret = readdir_r (gfc->gfc_dir, entryp, &result);
+ if (ret || !result)
+ break;
+
+ if ( !strcmp (basename (entryp->d_name), ".")
+ || !strcmp (basename (entryp->d_name), "..") )
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER (gfc->gfc_processing_dir,
+ buffer, off,
+ strlen (gfc->gfc_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer,
+ off, strlen (entryp->d_name));
+ GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1);
+
+ if (gf_changelog_write (tracker_fd, buffer, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ GF_FREE (entryp);
+
+ if (!result) {
+ if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1)
+ return nr_entries;
+ }
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_register() - register a client for updates.
+ */
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_level, int max_reconnects)
+{
+ int i = 0;
+ int ret = -1;
+ int errn = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this->ctx)
+ goto out;
+
+ errno = ENOMEM;
+
+ gfc = GF_CALLOC (1, sizeof (*gfc),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!gfc)
+ goto out;
+
+ gfc->this = this;
+
+ gfc->gfc_dir = NULL;
+ gfc->gfc_fd = gfc->gfc_sockfd = -1;
+
+ gfc->gfc_working_dir = realpath (scratch_dir, NULL);
+ if (!gfc->gfc_working_dir) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_changelog_open_dirs (gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create entries in scratch dir");
+ goto cleanup;
+ }
+
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (this->ctx, log_file, NULL))
+ goto cleanup;
+
+ gf_log_set_loglevel ((log_level == -1) ? GF_LOG_INFO :
+ log_level);
+
+ gfc->gfc_connretries = (max_reconnects <= 0) ? 1 : max_reconnects;
+ (void) strncpy (gfc->gfc_brickpath, brick_path, PATH_MAX);
+
+ ret = gf_changelog_notification_init (this, gfc);
+ if (ret) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_thread_create (&gfc->gfc_changelog_processor,
+ NULL, gf_changelog_process, gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "error creating changelog processor thread"
+ " new changes won't be recorded!!!");
+ goto cleanup;
+ }
+
+ for (; i < 256; i++) {
+ gfc->rfc3986[i] =
+ (isalnum(i) || i == '~' ||
+ i == '-' || i == '.' || i == '_') ? i : 0;
+ }
+
+ ret = 0;
+ this->private = gfc;
+
+ goto out;
+
+ cleanup:
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ this->private = NULL;
+ errno = errn;
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am
new file mode 100644
index 000000000..e85031ad4
--- /dev/null
+++ b/xlators/features/changelog/src/Makefile.am
@@ -0,0 +1,19 @@
+xlator_LTLIBRARIES = changelog.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \
+ changelog-misc.h changelog-encoders.h changelog-notifier.h
+
+changelog_la_LDFLAGS = -module -avoidversion
+
+changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \
+ changelog-encoders.c changelog-notifier.c
+changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 \
+ -D_GNU_SOURCE -D$(GF_HOST_OS) -shared -nostartfiles -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c
new file mode 100644
index 000000000..553eec85c
--- /dev/null
+++ b/