summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShyamsundarR <srangana@redhat.com>2018-09-13 13:12:47 -0400
committerShyamsundarR <srangana@redhat.com>2018-09-13 14:18:57 -0400
commit822dd79db99e0aa221ed90fdf8bb91fbcdfcd66f (patch)
tree505fcd3e1c65be76ff69bb528588129efd1b348f
parent41388f15fc0ff4891b292efd509c8887c06143d8 (diff)
core: move logs which are only developer relevant to DEBUG level
For each release, we move certain logs on master that are meant as an indicator to contributors to correct or adapt to core infrastructure changes in libglusterfs and other places. This commit achieves the above. Change-Id: I4157a7ec7d5ec9c2948b2bbc1e4cb8317f28d6b8 Updates: bz#1628620 Signed-off-by: ShyamsundarR <srangana@redhat.com>
-rw-r--r--libglusterfs/src/dict.c6
-rw-r--r--libglusterfs/src/graph.c2
-rw-r--r--libglusterfs/src/options.c2
-rw-r--r--libglusterfs/src/xlator.c2
-rw-r--r--rpc/xdr/src/glusterfs3.h2
5 files changed, 7 insertions, 7 deletions
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 2e69da98fdc..d75ce11c016 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -38,14 +38,14 @@ struct dict_cmp {
#define VALIDATE_DATA_AND_LOG(data, type, key, ret_val) \
do { \
if (!data || !data->data) { \
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, \
- LG_MSG_INVALID_ARG, "data is NULL"); \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
+ "data is NULL"); \
return ret_val; \
} \
/* Not of the asked type, or old version */ \
if ((data->data_type != type) && \
(data->data_type != GF_DATA_TYPE_STR_OLD)) { \
- gf_msg_callingfn("dict", GF_LOG_INFO, EINVAL, LG_MSG_INVALID_ARG, \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
"key %s, %s type asked, has %s type", key, \
data_type_name[type], \
data_type_name[data->data_type]); \
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index 2a213d2c48e..a105c0cd72d 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -393,7 +393,7 @@ _log_if_unknown_option(dict_t *dict, char *key, data_t *value, void *data)
found = xlator_volume_option_get(xl, key);
if (!found) {
- gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_XLATOR_OPTION_INVALID,
+ gf_msg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_XLATOR_OPTION_INVALID,
"option '%s' is not recognized", key);
}
diff --git a/libglusterfs/src/options.c b/libglusterfs/src/options.c
index 30607d2e390..a96828447fc 100644
--- a/libglusterfs/src/options.c
+++ b/libglusterfs/src/options.c
@@ -969,7 +969,7 @@ xl_opt_validate(dict_t *dict, char *key, data_t *value, void *data)
stub->errstr = errstr;
if (fnmatch(opt->key[0], key, FNM_NOESCAPE) != 0) {
- gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
+ gf_msg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_INVALID_ENTRY,
"option '%s' is deprecated, preferred is '%s', "
"continuing with correction",
key, opt->key[0]);
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 6975fbbd15f..0f21ea46ed1 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -335,7 +335,7 @@ xlator_dynload_newway(xlator_t *xl)
xlapi = dlsym(handle, "xlator_api");
if (!xlapi) {
- gf_msg("xlator", GF_LOG_INFO, 0, LG_MSG_DLSYM_ERROR,
+ gf_msg("xlator", GF_LOG_DEBUG, 0, LG_MSG_DLSYM_ERROR,
"dlsym(xlator_api) on %s. "
"Fall back to old symbols",
dlerror());
diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h
index b8ef394abaf..6ab80a82922 100644
--- a/rpc/xdr/src/glusterfs3.h
+++ b/rpc/xdr/src/glusterfs3.h
@@ -741,7 +741,7 @@ dict_to_xdr(dict_t *this, gfx_dict *dict)
xpair->value.gfx_value_u.other.other_len = dpair->value->len;
/* Change this to INFO, after taking the above down */
- gf_msg("dict", GF_LOG_INFO, EINVAL, LG_MSG_DICT_SERIAL_FAILED,
+ gf_msg("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_DICT_SERIAL_FAILED,
"key '%s' would not be sent on wire in the future",
dpair->key);
break;
al-entry.c1340
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c595
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h33
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1787
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h65
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c1371
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h50
-rw-r--r--xlators/cluster/afr/src/afr.c1014
-rw-r--r--xlators/cluster/afr/src/afr.h706
-rw-r--r--xlators/cluster/afr/src/pump.c737
-rw-r--r--xlators/cluster/afr/src/pump.h35
-rw-r--r--xlators/cluster/dht/src/Makefile.am20
-rw-r--r--xlators/cluster/dht/src/dht-common.c3506
-rw-r--r--xlators/cluster/dht/src/dht-common.h675
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c520
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c96
-rw-r--r--xlators/cluster/dht/src/dht-helper.c765
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1139
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1013
-rw-r--r--xlators/cluster/dht/src/dht-layout.c244
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c189
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h23
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1667
-rw-r--r--xlators/cluster/dht/src/dht-rename.c209
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c581
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c611
-rw-r--r--xlators/cluster/dht/src/nufa.c434
-rw-r--r--xlators/cluster/dht/src/switch.c347
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am11
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c675
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h28
-rw-r--r--xlators/cluster/stripe/src/stripe.c3660
-rw-r--r--xlators/cluster/stripe/src/stripe.h165
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am7
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c996
-rw-r--r--xlators/debug/error-gen/src/error-gen.h37
-rw-r--r--xlators/debug/io-stats/src/Makefile.am7
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c1098
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c3079
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c69
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from xlators/protocol/legacy/lib/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am (renamed from xlators/protocol/legacy/server/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)2
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/Makefile.am3
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1489
-rw-r--r--xlators/features/index/src/index.h73
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c348
-rw-r--r--xlators/features/locks/src/common.h98
-rw-r--r--xlators/features/locks/src/entrylk.c237
-rw-r--r--xlators/features/locks/src/inodelk.c441
-rw-r--r--xlators/features/locks/src/locks-mem-types.h21
-rw-r--r--xlators/features/locks/src/locks.h84
-rw-r--r--xlators/features/locks/src/posix.c1392
-rw-r--r--xlators/features/locks/src/reservelk.c51
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c52
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c37
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h24
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c134
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h49
-rw-r--r--xlators/features/marker/src/marker-quota.c986
-rw-r--r--xlators/features/marker/src/marker-quota.h73
-rw-r--r--xlators/features/marker/src/marker.c955
-rw-r--r--xlators/features/marker/src/marker.h65
-rw-r--r--xlators/features/marker/utils/Makefile.am7
-rwxr-xr-xxlators/features/marker/utils/gsyncd.in55
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am5
-rw-r--r--xlators/features/marker/utils/syncdaemon/README.md81
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py185
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py16
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py305
-rw-r--r--xlators/features/marker/utils/syncdaemon/libcxattr.py62
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py392
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py80
-rw-r--r--xlators/features/marker/utils/syncdaemon/repce.py162
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py476
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py188
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c29
-rw-r--r--xlators/features/protect/Makefile.am (renamed from xlators/protocol/legacy/client/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c720
-rw-r--r--xlators/features/quiesce/src/quiesce.h23
-rw-r--r--xlators/features/quota/src/Makefile.am7
-rw-r--r--xlators/features/quota/src/quota-mem-types.h23
-rw-r--r--xlators/features/quota/src/quota.c1461
-rw-r--r--xlators/features/quota/src/quota.h54
-rw-r--r--xlators/features/read-only/src/Makefile.am9
-rw-r--r--xlators/features/read-only/src/read-only-common.c133
-rw-r--r--xlators/features/read-only/src/read-only-common.h76
-rw-r--r--xlators/features/read-only/src/read-only.c22
-rw-r--r--xlators/features/read-only/src/worm.c36
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h23
-rw-r--r--xlators/features/trash/src/trash.c111
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c479
-rw-r--r--xlators/lib/src/libxlator.h104
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1953
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c4236
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c4243
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1147
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c531
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c271
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c693
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c9906
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h172
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c164
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c839
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1040
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2024
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1729
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c212
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h61
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2986
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h160
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c7083
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h439
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c2454
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h145
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c2225
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1170
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h724
-rw-r--r--xlators/mount/fuse/src/Makefile.am22
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c3342
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h331
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c455
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h23
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c930
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in423
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in3
-rw-r--r--xlators/nfs/server/src/Makefile.am23
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c1165
-rw-r--r--xlators/nfs/server/src/mount3.h41
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c230
-rw-r--r--xlators/nfs/server/src/nfs-common.h27
-rw-r--r--xlators/nfs/server/src/nfs-fops.c494
-rw-r--r--xlators/nfs/server/src/nfs-fops.h45
-rw-r--r--xlators/nfs/server/src/nfs-generics.c43
-rw-r--r--xlators/nfs/server/src/nfs-generics.h32
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c73
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h19
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h30
-rw-r--r--xlators/nfs/server/src/nfs.c996
-rw-r--r--xlators/nfs/server/src/nfs.h52
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c183
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h55
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c2726
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h39
-rw-r--r--xlators/nfs/server/src/nfs3.c1253
-rw-r--r--xlators/nfs/server/src/nfs3.h129
-rw-r--r--xlators/nfs/server/src/nlm4.c2525
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c894
-rw-r--r--xlators/performance/io-cache/src/io-cache.h34
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c22
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h20
-rw-r--r--xlators/performance/io-cache/src/page.c74
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c1496
-rw-r--r--xlators/performance/io-threads/src/io-threads.h36
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h21
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h22
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3751
-rw-r--r--xlators/performance/quick-read/src/quick-read.h61
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c66
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h20
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c537
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h23
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c4308
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h106
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c56
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h23
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3845
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/addr/src/addr.c36
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c22
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c25
-rw-r--r--xlators/protocol/client/src/client-handshake.c1045
-rw-r--r--xlators/protocol/client/src/client-helpers.c162
-rw-r--r--xlators/protocol/client/src/client-lk.c424
-rw-r--r--xlators/protocol/client/src/client-mem-types.h23
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6203
-rw-r--r--xlators/protocol/client/src/client.c1058
-rw-r--r--xlators/protocol/client/src/client.h145
-rw-r--r--xlators/protocol/client/src/client3_1-fops.c5545
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am27
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am20
-rw-r--r--xlators/protocol/server/src/authenticate.c108
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c230
-rw-r--r--xlators/protocol/server/src/server-helpers.c1378
-rw-r--r--xlators/protocol/server/src/server-helpers.h68
-rw-r--r--xlators/protocol/server/src/server-mem-types.h20
-rw-r--r--xlators/protocol/server/src/server-resolve.c448
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6179
-rw-r--r--xlators/protocol/server/src/server.c887
-rw-r--r--xlators/protocol/server/src/server.h143
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5221
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c933
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c2873
-rw-r--r--xlators/storage/posix/src/posix.h131
-rw-r--r--xlators/system/posix-acl/src/Makefile.am10
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h44
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c584
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h70
452 files changed, 153140 insertions, 100156 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index b1643d26c..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
CLEANFILES =
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
index c0b9141c6..90370d861 100644
--- a/xlators/bindings/python/src/Makefile.am
+++ b/xlators/bindings/python/src/Makefile.am
@@ -9,7 +9,7 @@ pythondir = $(xlatordir)/python
python_so_SOURCES = python.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+AM_CFLAGS = -fPIC $(GF_CPPFLAGS) -Wall \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
$(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
index ee0eb1310..337c983ec 100644
--- a/xlators/bindings/python/src/gluster.py
+++ b/xlators/bindings/python/src/gluster.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
from glusterstack import *
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
index ba24c8165..0c071ae98 100644
--- a/xlators/bindings/python/src/glusterstack.py
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
index e9069d07c..98437d22e 100644
--- a/xlators/bindings/python/src/glustertypes.py
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
import collections
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
index 3310a2115..9b96790de 100644
--- a/xlators/bindings/python/src/python.c
+++ b/xlators/bindings/python/src/python.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <Python.h>
#ifndef _CONFIG_H
@@ -45,7 +35,7 @@ python_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
python_private_t *priv = (python_private_t *)this->private;
@@ -148,7 +138,7 @@ init (xlator_t *this)
Py_InitializeEx(0);
if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
+ gf_log ("python", GF_LOG_ERROR,
"FATAL: python should have exactly one child");
return -1;
}
@@ -166,7 +156,7 @@ init (xlator_t *this)
}
priv->pInterp = Py_NewInterpreter();
-
+
// Adjust python's path
PyObject *syspath = PySys_GetObject("path");
PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
@@ -188,7 +178,7 @@ init (xlator_t *this)
priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
+
priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
if (!priv->pScriptModule || PyErr_Occurred())
{
@@ -217,7 +207,7 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
python_private_t *priv = (python_private_t*)(this->private);
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
index 507455c85..59a991dca 100644
--- a/xlators/bindings/python/src/testxlator.py
+++ b/xlators/bindings/python/src/testxlator.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+"""
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
"""
This is a test translator written in python.
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index e192b599b..35d18a6c0 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,21 +1,31 @@
xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c $(top_builddir)/xlators/lib/src/libxlator.c
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
+ afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
+ afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
+afr_la_LDFLAGS = -module -avoid-version
afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoidversion
+pump_la_LDFLAGS = -module -avoid-version
pump_la_SOURCES = $(afr_common_source) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
+ afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
+ afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
+ $(top_builddir)/glusterfsd/src/glusterfsd.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index ca470716a..af01f2ef2 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -44,6 +35,7 @@
#include "compat.h"
#include "byte-order.h"
#include "statedump.h"
+#include "inode.h"
#include "fd.h"
@@ -54,12 +46,12 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#include "afr-self-heald.h"
#include "pump.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-
+#define AFR_STATISTICS_HISTORY_SIZE 50
int
afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
gf_boolean_t fail_conflict);
@@ -90,6 +82,75 @@ afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
path, priv->pending_key[i]);
/* 3 = data+metadata+entry */
}
+ ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
+ "lookup", path);
+ }
+}
+
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc, void **gfid_req)
+{
+ int ret = -ENOMEM;
+
+ GF_ASSERT (gfid_req);
+
+ *gfid_req = NULL;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
+
+ afr_xattr_req_prepare (this, local->xattr_req, loc->path);
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to get the gfid from dict", loc->path);
+ *gfid_req = NULL;
+ } else {
+ if (loc->parent != NULL)
+ dict_del (local->xattr_req, "gfid-req");
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
+{
+ inode_t *inode = NULL;
+
+ inode = loc->inode;
+ if (inode && !uuid_is_null (inode->gfid))
+ uuid_copy (dst, inode->gfid);
+ else if (!uuid_is_null (loc->gfid))
+ uuid_copy (dst, loc->gfid);
+ else if (new && !uuid_is_null (new))
+ uuid_copy (dst, new);
}
int
@@ -132,7 +193,7 @@ afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
if (ret)
- gf_log (THIS->name, GF_LOG_DEBUG, "gfid set failed");
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
out:
if (ret && pgfid)
@@ -141,65 +202,91 @@ out:
return ret;
}
-afr_inode_ctx_t*
-afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
+void
+afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
{
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- size_t size = 0;
+ if (!ctx)
+ return;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+}
- GF_ASSERT (child_count > 0);
+afr_inode_ctx_t*
+__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
- if (!addr) {
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto out;
- size = sizeof (*ctx->fresh_children);
- ctx->fresh_children = GF_CALLOC (child_count, size,
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto out;
- } else {
- ctx = (afr_inode_ctx_t*) (long) addr;
+ priv = this->private;
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (afr_inode_ctx_t*) (long) ctx_addr;
+ goto out;
}
- ret = 0;
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*ctx->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+
out:
- if (ret && ctx) {
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
- ctx = NULL;
+ return ctx;
+
+fail:
+ afr_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
}
+ UNLOCK (&inode->lock);
return ctx;
}
void
-afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
{
GF_ASSERT (inode);
GF_ASSERT (params);
- int ret = 0;
afr_inode_ctx_t *ctx = NULL;
afr_private_t *priv = NULL;
int i = 0;
- uint64_t ctx_addr = 0;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- goto unlock;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ ctx = __afr_inode_ctx_get (inode, this);
if (!ctx)
goto unlock;
- switch (params->mask_type) {
- case AFR_ICTX_READ_CHILD_MASK:
- fresh_children = params->u.read_ctx.fresh_children;
+ switch (params->op) {
+ case AFR_INODE_GET_READ_CTX:
+ fresh_children = params->u.read_ctx.children;
read_child = (int32_t)(ctx->masks &
AFR_ICTX_READ_CHILD_MASK);
params->u.read_ctx.read_child = read_child;
@@ -208,12 +295,13 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
for (i = 0; i < priv->child_count; i++)
fresh_children[i] = ctx->fresh_children[i];
break;
- case AFR_ICTX_OPENDIR_DONE_MASK:
- params->u.value = ctx->masks &
- AFR_ICTX_OPENDIR_DONE_MASK;
+ case AFR_INODE_GET_OPENDIR_DONE:
+ params->u.value = _gf_false;
+ if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
+ params->u.value = _gf_true;
break;
- case AFR_ICTX_SPLIT_BRAIN_MASK:
- params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK;
+ default:
+ GF_ASSERT (0);
break;
}
}
@@ -221,14 +309,19 @@ unlock:
UNLOCK (&inode->lock);
}
-uint64_t
+gf_boolean_t
afr_is_split_brain (xlator_t *this, inode_t *inode)
{
- afr_inode_params_t params = {0};
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t spb = _gf_false;
- params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ ctx = afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto out;
+ if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
+ spb = _gf_true;
+out:
+ return spb;
}
gf_boolean_t
@@ -236,37 +329,40 @@ afr_is_opendir_done (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
- afr_inode_get_ctx (this, inode, &params);
+ params.op = AFR_INODE_GET_OPENDIR_DONE;
+ afr_inode_get_ctx_params (this, inode, &params);
return params.u.value;
}
-
int32_t
afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_READ_CHILD_MASK;
- params.u.read_ctx.fresh_children = fresh_children;
- afr_inode_get_ctx (this, inode, &params);
+ params.op = AFR_INODE_GET_READ_CTX;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_get_ctx_params (this, inode, &params);
return params.u.read_ctx.read_child;
}
void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
+afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
- int i = 0;
- rest_of_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = rest_of_mask | mask;
+ ctx->masks = remaining_mask | mask;
+}
- /* avoid memcpy as int, int32_t are used interchangeably
- */
+void
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
+{
+ int i = 0;
+
+ afr_inode_ctx_set_read_child (ctx, read_child);
for (i = 0; i < child_count; i++) {
if (fresh_children)
ctx->fresh_children[i] = fresh_children[i];
@@ -276,89 +372,92 @@ afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
}
void
-afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
+ int32_t child_count)
{
- uint64_t rest_of_mask = 0;
- uint64_t mask = 0;
+ int i = 0;
+ int32_t read_child = -1;
- rest_of_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = rest_of_mask | mask;
+ GF_ASSERT (stale_children);
+ for (i = 0; i < child_count; i++) {
+ if (stale_children[i] == -1)
+ break;
+ afr_children_rm_child (ctx->fresh_children,
+ stale_children[i], child_count);
+ }
+ read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
+ if (!afr_is_child_present (ctx->fresh_children, child_count,
+ read_child))
+ afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
}
void
-afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
+afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
- if (set) {
- rest_of_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = rest_of_mask | mask;
- } else {
- ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- }
+ remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ ctx->masks = remaining_mask | mask;
}
void
-afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
{
GF_ASSERT (inode);
GF_ASSERT (params);
- int ret = 0;
afr_inode_ctx_t *ctx = NULL;
afr_private_t *priv = NULL;
- uint64_t ctx_addr = 0;
- gf_boolean_t set = _gf_false;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
+ int32_t *stale_children = NULL;
priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ ctx = __afr_inode_ctx_get (inode, this);
if (!ctx)
goto unlock;
- switch (params->mask_type) {
- case AFR_ICTX_READ_CHILD_MASK:
+ switch (params->op) {
+ case AFR_INODE_SET_READ_CTX:
read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.fresh_children;
+ fresh_children = params->u.read_ctx.children;
afr_inode_ctx_set_read_ctx (ctx, read_child,
fresh_children,
priv->child_count);
break;
- case AFR_ICTX_OPENDIR_DONE_MASK:
+ case AFR_INODE_RM_STALE_CHILDREN:
+ stale_children = params->u.read_ctx.children;
+ afr_inode_ctx_rm_stale_children (ctx,
+ stale_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_SET_OPENDIR_DONE:
afr_inode_ctx_set_opendir_done (ctx);
break;
- case AFR_ICTX_SPLIT_BRAIN_MASK:
- set = params->u.value;
- afr_inode_ctx_set_splitbrain (ctx, set);
+ default:
+ GF_ASSERT (0);
break;
}
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
}
unlock:
UNLOCK (&inode->lock);
}
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb)
{
- afr_inode_params_t params = {0};
+ afr_inode_ctx_t *ctx = NULL;
- params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
- params.u.value = set;
- afr_inode_set_ctx (this, inode, &params);
+ ctx = afr_inode_ctx_get (inode, this);
+ if (mdata_spb != DONT_KNOW)
+ ctx->mdata_spb = mdata_spb;
+ if (data_spb != DONT_KNOW)
+ ctx->data_spb = data_spb;
}
void
@@ -366,8 +465,8 @@ afr_set_opendir_done (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
- afr_inode_set_ctx (this, inode, &params);
+ params.op = AFR_INODE_SET_OPENDIR_DONE;
+ afr_inode_set_ctx_params (this, inode, &params);
}
void
@@ -375,14 +474,31 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
int32_t *fresh_children)
{
afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+ priv = this->private;
GF_ASSERT (read_child >= 0);
GF_ASSERT (fresh_children);
+ GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
+ read_child));
- params.mask_type = AFR_ICTX_READ_CHILD_MASK;
+ params.op = AFR_INODE_SET_READ_CTX;
params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.fresh_children = fresh_children;
- afr_inode_set_ctx (this, inode, &params);
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children)
+{
+ afr_inode_params_t params = {0};
+
+ GF_ASSERT (stale_children);
+
+ params.op = AFR_INODE_RM_STALE_CHILDREN;
+ params.u.read_ctx.children = stale_children;
+ afr_inode_set_ctx_params (this, inode, &params);
}
gf_boolean_t
@@ -426,6 +542,10 @@ afr_is_read_child (int32_t *success_children, int32_t *sources,
gf_boolean_t success_child = _gf_false;
gf_boolean_t source = _gf_false;
+ if (child < 0) {
+ return _gf_false;
+ }
+
GF_ASSERT (success_children);
GF_ASSERT (child_count > 0);
@@ -442,29 +562,69 @@ out:
return (success_child && source);
}
+int32_t
+afr_hash_child (int32_t *success_children, int32_t child_count,
+ unsigned int hmode, uuid_t gfid)
+{
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
+
+ if (!hmode) {
+ return -1;
+ }
+
+ if (gfid) {
+ uuid_copy(gfid_copy,gfid);
+ }
+ if (hmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
+
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
+
/* If sources is NULL the xattrs are assumed to be of source for all
* success_children.
*/
int
-afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources)
+afr_select_read_child_from_policy (int32_t *success_children,
+ int32_t child_count, int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid)
{
int32_t read_child = -1;
int i = 0;
GF_ASSERT (success_children);
- read_child = prev_read_child;
+ read_child = config_read_child;
if (afr_is_read_child (success_children, sources, child_count,
read_child))
goto out;
- read_child = config_read_child;
+ read_child = prev_read_child;
if (afr_is_read_child (success_children, sources, child_count,
read_child))
goto out;
+ read_child = afr_hash_child (success_children, child_count,
+ hmode, gfid);
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child)) {
+ goto out;
+ }
+
for (i = 0; i < child_count; i++) {
read_child = success_children[i];
if (read_child < 0)
@@ -484,7 +644,7 @@ out:
void
afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child)
+ int32_t config_read_child, uuid_t gfid)
{
int read_child = -1;
afr_private_t *priv = NULL;
@@ -494,7 +654,8 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
priv->child_count,
prev_read_child,
config_read_child,
- NULL);
+ NULL,
+ priv->hash_mode, gfid);
if (read_child >= 0)
afr_inode_set_read_ctx (this, inode, read_child,
fresh_children);
@@ -550,8 +711,11 @@ afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
GF_ASSERT (call_child);
GF_ASSERT (last_index);
GF_ASSERT (fresh_children);
- GF_ASSERT (read_child >= 0);
+ if (read_child < 0) {
+ ret = -EIO;
+ goto out;
+ }
priv = this->private;
*call_child = -1;
*last_index = -1;
@@ -600,113 +764,103 @@ out:
}
void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
+{
+ afr_reset_xattr (xattr, child_count);
+ GF_FREE (xattr);
+}
+
+void
afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
{
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
-
sh = &local->self_heal;
priv = this->private;
- if (sh->buf)
- GF_FREE (sh->buf);
+ if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
+ GF_FREE (sh->data_sh_info);
+
+ if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
+ GF_FREE (sh->metadata_sh_info);
- if (sh->parentbufs)
- GF_FREE (sh->parentbufs);
+ GF_FREE (sh->buf);
+
+ GF_FREE (sh->parentbufs);
if (sh->inode)
inode_unref (sh->inode);
- if (sh->xattr) {
- afr_reset_xattr (sh->xattr, priv->child_count);
- GF_FREE (sh->xattr);
- }
+ afr_xattr_array_destroy (sh->xattr, priv->child_count);
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ GF_FREE (sh->child_errno);
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
- }
+ afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
+ afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
- }
+ GF_FREE (sh->sources);
- if (sh->sources)
- GF_FREE (sh->sources);
+ GF_FREE (sh->success);
- if (sh->success)
- GF_FREE (sh->success);
+ GF_FREE (sh->locked_nodes);
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
-
- if (sh->healing_fd && !sh->healing_fd_opened) {
+ if (sh->healing_fd) {
fd_unref (sh->healing_fd);
sh->healing_fd = NULL;
}
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
+ GF_FREE ((char *)sh->linkname);
- if (sh->success_children)
- GF_FREE (sh->success_children);
+ GF_FREE (sh->success_children);
- if (sh->fresh_children)
- GF_FREE (sh->fresh_children);
+ GF_FREE (sh->fresh_children);
- if (sh->fresh_parent_dirs)
- GF_FREE (sh->fresh_parent_dirs);
+ GF_FREE (sh->fresh_parent_dirs);
loc_wipe (&sh->parent_loc);
+ loc_wipe (&sh->lookup_loc);
+
+ GF_FREE (sh->checksum);
+
+ GF_FREE (sh->write_needed);
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
}
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
{
- int i = 0;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
- }
-
- GF_FREE (local->pending);
-
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ afr_matrix_cleanup (local->transaction.txn_changelog,
+ priv->child_count);
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+ GF_FREE (local->internal_lock.locked_nodes);
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+ afr_entry_lockee_cleanup (&local->internal_lock);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
+ GF_FREE (local->transaction.pre_op);
+ GF_FREE (local->transaction.eager_lock);
GF_FREE (local->transaction.basename);
GF_FREE (local->transaction.new_basename);
loc_wipe (&local->transaction.parent_loc);
loc_wipe (&local->transaction.new_parent_loc);
+
+ GF_FREE (local->transaction.postop_piggybacked);
}
@@ -733,11 +887,16 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->xattr_req)
dict_unref (local->xattr_req);
- if (local->child_up)
- GF_FREE (local->child_up);
+ if (local->dict)
+ dict_unref (local->dict);
+
+ GF_FREE(local->replies);
- if (local->fresh_children)
- GF_FREE (local->fresh_children);
+ GF_FREE (local->child_up);
+
+ GF_FREE (local->child_errno);
+
+ GF_FREE (local->fresh_children);
{ /* lookup */
if (local->cont.lookup.xattrs) {
@@ -755,27 +914,23 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
inode_unref (local->cont.lookup.inode);
}
- if (local->cont.lookup.postparents)
- GF_FREE (local->cont.lookup.postparents);
+ GF_FREE (local->cont.lookup.postparents);
- if (local->cont.lookup.bufs)
- GF_FREE (local->cont.lookup.bufs);
+ GF_FREE (local->cont.lookup.bufs);
- if (local->cont.lookup.success_children)
- GF_FREE (local->cont.lookup.success_children);
+ GF_FREE (local->cont.lookup.success_children);
- if (local->cont.lookup.sources)
- GF_FREE (local->cont.lookup.sources);
+ GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
}
{ /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
+ GF_FREE (local->cont.getxattr.name);
}
{ /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
+ GF_FREE (local->cont.lk.locked_nodes);
}
{ /* create */
@@ -809,18 +964,40 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
dict_unref (local->cont.setxattr.dict);
}
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
{ /* removexattr */
GF_FREE (local->cont.removexattr.name);
}
-
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
{ /* symlink */
GF_FREE (local->cont.symlink.linkpath);
}
{ /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ GF_FREE (local->cont.opendir.checksum);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
}
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
}
@@ -841,23 +1018,41 @@ afr_frame_return (call_frame_t *frame)
return call_count;
}
-
-/**
- * up_children_count - return the number of children that are up
- */
-
int
-afr_up_children_count (int child_count, unsigned char *child_up)
+afr_set_elem_count_get (unsigned char *elems, int child_count)
{
int i = 0;
int ret = 0;
for (i = 0; i < child_count; i++)
- if (child_up[i])
+ if (elems[i])
ret++;
return ret;
}
+/**
+ * up_children_count - return the number of children that are up
+ */
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count)
+{
+ return afr_set_elem_count_get (child_up, child_count);
+}
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count)
+{
+ return afr_set_elem_count_get (children, child_count);
+}
+
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count)
+{
+ return afr_set_elem_count_get (pre_op, child_count);
+}
+
gf_boolean_t
afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
{
@@ -885,32 +1080,150 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
uuid_copy (loc->pargfid, postparent->ia_gfid);
}
-void
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+static void
+afr_handle_quota_size (afr_local_t *local, xlator_t *this,
+ dict_t *rsp_dict)
+{
+ int32_t *sources = NULL;
+ dict_t *xattr = NULL;
+ data_t *max_data = NULL;
+ int64_t max_quota_size = -1;
+ data_t *data = NULL;
+ int64_t *size = NULL;
+ int64_t quota_size = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t source_present = _gf_false;
+
+ priv = this->private;
+ sources = local->cont.lookup.sources;
+
+ if (rsp_dict == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
+ "response dictionary", local->loc.path);
+ return;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source_present = _gf_true;
+ break;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * If there is at least one source lets check
+ * for maximum quota sizes among sources, otherwise take the
+ * maximum of the ones present to be on the safer side.
+ */
+ if (source_present && !sources[i])
+ continue;
+
+ xattr = local->cont.lookup.xattrs[i];
+ if (!xattr)
+ continue;
+
+ data = dict_get (xattr, QUOTA_SIZE_KEY);
+ if (!data)
+ continue;
+
+ size = (int64_t*)data->data;
+ quota_size = ntoh64(*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
+ local->loc.path, i, quota_size);
+ if (quota_size > max_quota_size) {
+ if (max_data)
+ data_unref (max_data);
+
+ max_quota_size = quota_size;
+ max_data = data_ref (data);
+ }
+ }
+
+ if (max_data) {
+ ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "quota size", local->loc.path);
+ }
+
+ data_unref (max_data);
+ }
+}
+
+int
afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
{
- int32_t read_child = -1;
struct iatt *buf = NULL;
struct iatt *postparent = NULL;
dict_t **xattr = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int ret = 0;
+ int i = 0;
GF_ASSERT (local);
buf = &local->cont.lookup.buf;
postparent = &local->cont.lookup.postparent;
xattr = &local->cont.lookup.xattr;
+ priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
- NULL);
+ local->fresh_children);
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+ success_children = local->cont.lookup.success_children;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_children_intersection_get (local->fresh_children, success_children,
+ sources, priv->child_count);
+ if (!sources[read_child]) {
+ read_child = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ read_child = i;
+ break;
+ }
+ }
+ }
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+
gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
read_child);
- *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+ if (!*xattr)
+ *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+
*buf = local->cont.lookup.bufs[read_child];
*postparent = local->cont.lookup.postparents[read_child];
+ if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
+ afr_handle_quota_size (local, this, *xattr);
+
if (IA_INVAL == local->cont.lookup.inode->ia_type) {
/* fix for RT #602 */
local->cont.lookup.inode->ia_type = buf->ia_type;
}
+out:
+ return ret;
}
static void
@@ -920,6 +1233,7 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
uint32_t inodelk_count = 0;
uint32_t entrylk_count = 0;
int ret = -1;
+ uint32_t parent_entrylk = 0;
GF_ASSERT (local);
GF_ASSERT (this);
@@ -935,99 +1249,164 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
&entrylk_count);
if (ret == 0)
local->entrylk_count += entrylk_count;
+ ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
+ &parent_entrylk);
+ if (!ret)
+ local->cont.lookup.parent_entrylk += parent_entrylk;
}
+/*
+ * It's important to maintain a commutative property on do_*_self_heal and
+ * found*; once set, they must not be cleared by a subsequent iteration or
+ * call, so that they represent a logical OR of all iterations and calls
+ * regardless of child/key order. That allows the caller to call us multiple
+ * times without having to use a separate variable as a "reduce" accumulator.
+ */
static void
-afr_lookup_set_self_heal_data_by_xattr (afr_local_t *local, xlator_t *this,
- dict_t *xattr)
+afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
+ dict_t *xattr)
{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ void *pending_raw = NULL;
+ int32_t *pending = NULL;
+
GF_ASSERT (local);
GF_ASSERT (this);
GF_ASSERT (xattr);
- if (afr_sh_has_metadata_pending (xattr, this)) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
+ priv = this->private;
- if (afr_sh_has_entry_pending (xattr, this)) {
- local->self_heal.need_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xattr, priv->pending_key[i],
+ &pending_raw);
+ if (ret != 0) {
+ continue;
+ }
+ pending = pending_raw;
- if (afr_sh_has_data_pending (xattr, this)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
+ if (pending[AFR_METADATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_ENTRY_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_entry_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_DATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
}
}
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
static void
afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
struct iatt *buf, struct iatt *lookup_buf)
{
if (PERMISSION_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"permissions differ for %s ", local->loc.path);
- local->self_heal.need_metadata_self_heal = _gf_true;
+ local->self_heal.do_metadata_self_heal = _gf_true;
}
if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_INFO,
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
"ownership differs for %s ", local->loc.path);
}
if (SIZE_DIFFERS (buf, lookup_buf)
&& IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"size differs for %s ", local->loc.path);
- local->self_heal.need_data_self_heal = _gf_true;
+ local->self_heal.do_data_self_heal = _gf_true;
}
if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
/* mismatching gfid */
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s: gfid different on subvolume", local->loc.path);
}
}
static void
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
+{
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
+
+static void
afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
{
GF_ASSERT (local);
GF_ASSERT (this);
if ((local->success_count > 0) && (local->enoent_count > 0)) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- local->self_heal.need_data_self_heal = _gf_true;
- local->self_heal.need_entry_self_heal = _gf_true;
- local->self_heal.need_gfid_self_heal = _gf_true;
- local->self_heal.need_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ local->self_heal.do_data_self_heal = _gf_true;
+ local->self_heal.do_entry_self_heal = _gf_true;
+ local->self_heal.do_gfid_self_heal = _gf_true;
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
"entries are missing in lookup of %s.",
local->loc.path);
- //If all self-heals are needed no need to check for other rules
- goto out;
}
- if ((local->success_count > 0) &&
- afr_is_split_brain (this, local->cont.lookup.inode) &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.need_data_self_heal = _gf_true;
- local->self_heal.need_gfid_self_heal = _gf_true;
- local->self_heal.need_missing_entry_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
-
-out:
return;
}
@@ -1037,11 +1416,14 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
GF_ASSERT (sh);
GF_ASSERT (priv);
- return (sh->need_gfid_self_heal
- || sh->need_missing_entry_self_heal
- || (priv->data_self_heal && sh->need_data_self_heal)
- || (priv->metadata_self_heal && sh->need_metadata_self_heal)
- || (priv->entry_self_heal && sh->need_entry_self_heal));
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ return (sh->do_gfid_self_heal
+ || sh->do_missing_entry_self_heal
+ || (afr_data_self_heal_enabled (priv->data_self_heal) &&
+ sh->do_data_self_heal)
+ || (priv->metadata_self_heal && sh->do_metadata_self_heal)
+ || (priv->entry_self_heal && sh->do_entry_self_heal));
}
afr_transaction_type
@@ -1068,14 +1450,13 @@ afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
int ret = -1;
dict_t **xattrs = NULL;
int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ uuid_t *gfid = NULL;
GF_ASSERT (local);
GF_ASSERT (this);
GF_ASSERT (local->success_count > 0);
- bufs = local->cont.lookup.bufs;
success_children = local->cont.lookup.success_children;
/*We can take the success_children[0] only because we already
*handle the conflicting children other wise, we could select the
@@ -1084,8 +1465,9 @@ afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
type = afr_transaction_type_get (ia_type);
xattrs = local->cont.lookup.xattrs;
+ gfid = &local->cont.lookup.buf.ia_gfid;
source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
- type);
+ type, *gfid);
if (source < 0) {
gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
"for %s", local->loc.path);
@@ -1101,29 +1483,32 @@ out:
}
static inline gf_boolean_t
-afr_is_self_heal_running (afr_local_t *local)
+afr_is_transaction_running (afr_local_t *local)
{
- GF_ASSERT (local);
+ GF_ASSERT (local->fop == GF_FOP_LOOKUP);
return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
}
-static void
+void
afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t is_background, ia_type_t ia_type,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
xlator_t *this),
int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno))
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed))
{
afr_local_t *local = NULL;
char sh_type_str[256] = {0,};
+ char *bg = "";
GF_ASSERT (frame);
GF_ASSERT (this);
GF_ASSERT (inode);
+ GF_ASSERT (ia_type != IA_INVAL);
local = frame->local;
- local->self_heal.background = is_background;
+ local->self_heal.background = background;
local->self_heal.type = ia_type;
local->self_heal.unwind = unwind;
local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
@@ -1132,19 +1517,21 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
sh_type_str,
sizeof (sh_type_str));
- gf_log (this->name, GF_LOG_INFO,
- "background %s self-heal triggered. path: %s",
- sh_type_str, local->loc.path);
+ if (background)
+ bg = "background";
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s %s self-heal triggered. path: %s, reason: %s", bg,
+ sh_type_str, local->loc.path, reason);
afr_self_heal (frame, this, inode);
}
-int
+unsigned int
afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
struct iatt *bufs, unsigned int child_count,
const char *path)
{
- int gfid_miss_count = 0;
+ unsigned int gfid_miss_count = 0;
int i = 0;
struct iatt *child1 = NULL;
@@ -1203,7 +1590,7 @@ afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
child2 = &bufs[success_children[i-1]];
if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
"differs on subvolumes (%d, %d)", path,
success_children[i-1], success_children[i]);
conflicting = _gf_true;
@@ -1212,7 +1599,7 @@ afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
if (!gfid || uuid_is_null (child1->ia_gfid))
continue;
if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
" on subvolume %d", path, success_children[i]);
conflicting = _gf_true;
goto out;
@@ -1266,8 +1653,27 @@ afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
return conflict;
}
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal)
+{
+ return !strcmp (data_self_heal, "open");
+}
+
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal)
+{
+ gf_boolean_t enabled = _gf_false;
+
+ if (gf_string2boolean (data_self_heal, &enabled) == -1) {
+ enabled = !strcmp (data_self_heal, "open");
+ GF_ASSERT (enabled);
+ }
+
+ return enabled;
+}
+
static void
-afr_lookup_set_self_heal_data (afr_local_t *local, xlator_t *this)
+afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
{
int i = 0;
struct iatt *bufs = NULL;
@@ -1275,15 +1681,18 @@ afr_lookup_set_self_heal_data (afr_local_t *local, xlator_t *this)
afr_private_t *priv = NULL;
int32_t child1 = -1;
int32_t child2 = -1;
+ afr_self_heal_t *sh = NULL;
priv = this->private;
+ sh = &local->self_heal;
+
afr_detect_self_heal_by_lookup_status (local, this);
if (afr_lookup_gfid_missing_count (local, this))
- local->self_heal.need_gfid_self_heal = _gf_true;
+ local->self_heal.do_gfid_self_heal = _gf_true;
if (_gf_true == afr_lookup_conflicting_entries (local, this))
- local->self_heal.need_missing_entry_self_heal = _gf_true;
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
else
afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
local->cont.lookup.bufs,
@@ -1301,23 +1710,31 @@ afr_lookup_set_self_heal_data (afr_local_t *local, xlator_t *this)
xattr = local->cont.lookup.xattrs;
for (i = 0; i < local->success_count; i++) {
child1 = local->cont.lookup.success_children[i];
- afr_lookup_set_self_heal_data_by_xattr (local, this,
- xattr[child1]);
- }
+ afr_lookup_set_self_heal_params_by_xattr (local, this,
+ xattr[child1]);
+ }
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
}
int
afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed)
{
afr_local_t *local = NULL;
+ int ret = -1;
+ dict_t *xattr = NULL;
local = frame->local;
if (op_ret == -1) {
local->op_ret = -1;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local->op_errno = afr_most_important_error(local->op_errno,
+ op_errno, _gf_true);
goto out;
} else {
@@ -1325,6 +1742,23 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
}
afr_lookup_done_success_action (frame, this, _gf_true);
+ xattr = local->cont.lookup.xattr;
+ if (xattr) {
+ ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "sh-failed to %d", local->loc.path, sh_failed);
+
+ if (local->self_heal.actual_sh_started == _gf_true &&
+ sh_failed == 0) {
+ ret = dict_set_int32 (xattr, "actual-sh-done", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
+ " set actual-sh-done to %d",
+ local->loc.path,
+ local->self_heal.actual_sh_started);
+ }
+ }
out:
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->cont.lookup.inode, &local->cont.lookup.buf,
@@ -1376,33 +1810,36 @@ afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
}
static void
-afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,
- gf_boolean_t *sh_launched)
+afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t *sh_launched)
{
- size_t up_count = 0;
+ unsigned int up_count = 0;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ char *reason = NULL;
GF_ASSERT (sh_launched);
*sh_launched = _gf_false;
priv = this->private;
local = frame->local;
- up_count = afr_up_children_count (priv->child_count, local->child_up);
+ up_count = afr_up_children_count (local->child_up, priv->child_count);
if (up_count == 1) {
gf_log (this->name, GF_LOG_DEBUG,
"Only 1 child up - do not attempt to detect self heal");
goto out;
}
- afr_lookup_set_self_heal_data (local, this);
+ afr_lookup_set_self_heal_params (local, this);
if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_self_heal_running (local))
+ if (afr_is_transaction_running (local) &&
+ (!local->allow_sh_for_running_transaction))
goto out;
+ reason = "lookup detected pending operations";
afr_launch_self_heal (frame, this, local->cont.lookup.inode,
_gf_true, local->cont.lookup.buf.ia_type,
- afr_post_gfid_sh_success,
+ reason, afr_post_gfid_sh_success,
afr_self_heal_lookup_unwind);
*sh_launched = _gf_true;
}
@@ -1457,53 +1894,175 @@ afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
int32_t read_child = -1;
int32_t ret = -1;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ gf_boolean_t fresh_lookup = _gf_false;
local = frame->local;
- priv = this->private;
+ fresh_lookup = local->cont.lookup.fresh_lookup;
if (local->loc.parent == NULL)
fail_conflict = _gf_true;
- if (afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name)) {
- if (fail_conflict == _gf_false) {
+ if (afr_lookup_conflicting_entries (local, this)) {
+ if (fail_conflict == _gf_false)
ret = 0;
- } else {
- local->op_ret = -1;
- local->op_errno = EIO;
- }
goto out;
}
ret = afr_lookup_select_read_child (local, this, &read_child);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
+ if (!afr_is_transaction_running (local) || fresh_lookup) {
+ if (read_child < 0)
+ goto out;
+
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret)
+ goto out;
}
- ret = afr_lookup_set_read_ctx (local, this, read_child);
+ ret = afr_lookup_build_response_params (local, this);
+ if (ret)
+ goto out;
+ afr_update_loc_gfids (&local->loc,
+ &local->cont.lookup.buf,
+ &local->cont.lookup.postparent);
+
+ ret = 0;
+out:
if (ret) {
local->op_ret = -1;
local->op_errno = EIO;
- goto out;
}
+ return ret;
+}
+
+int
+afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+ int lsubvol = -1;
- afr_lookup_build_response_params (local, this);
- if (afr_is_fresh_lookup (&local->loc, this)) {
- afr_update_loc_gfids (&local->loc,
- &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if (uuid_is_null (bufs[child].ia_gfid))
+ continue;
+ if (lsubvol < 0) {
+ lsubvol = child;
+ } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
+ lsubvol = child;
+ } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
+ (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
+ lsubvol = child;
+ }
}
+ return lsubvol;
+}
- ret = 0;
+void
+afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
+ int subvol)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ memcpy (local->fresh_children, success_children,
+ sizeof (*success_children) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ child = local->fresh_children[i];
+ if (child == -1)
+ break;
+ if (child == subvol)
+ continue;
+ if (uuid_is_null (bufs[child].ia_gfid) &&
+ (bufs[child].ia_type == bufs[subvol].ia_type))
+ continue;
+ afr_children_rm_child (success_children, child,
+ priv->child_count);
+ local->success_count--;
+ }
+ afr_reset_children (local->fresh_children, priv->child_count);
+}
+
+void
+afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+{
+ int lsubvol = 0;
+
+ if (!afr_lookup_conflicting_entries (local, this))
+ goto out;
+
+ lsubvol = afr_lookup_get_latest_subvol (local, this);
+ if (lsubvol < 0)
+ goto out;
+ afr_lookup_mark_other_entries_stale (local, this, lsubvol);
out:
- return ret;
+ return;
+}
+
+gf_boolean_t
+afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
+{
+ /*
+ * We need to perform this test in lookup done and treat on going
+ * create/DELETE as ENOENT.
+ * Reason:
+ Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
+
+ 1 Client A is in the middle of mkdir(/a). It has acquired lock.
+ It has performed mkdir(/a) on one subvol, and second one is still
+ in progress
+ 2 Client B performs a lookup, sees directory /a on one,
+ ENOENT on the other, succeeds lookup.
+ 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
+ (one subvol because /a/b does not exist, another because /a
+ itself does not exist)
+ 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
+ basename=b on one subvol, but fails on other subvol as /a is yet to
+ be created by Client A.
+ 5 Client A finishes mkdir of /a on other subvol
+ 6 Client C also attempts to create /a/b, lookup returns ENOENT on
+ both subvols.
+ 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
+ obtains on one subvol (where B had failed), and waits for B to unlock
+ on other subvol.
+ 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
+ transaction and unlocks
+ 9 Client C gets the lock on the second subvol, At this stage second
+ subvol already has /a/b created from Client B, but Client C does not
+ check that in the middle of mkdir transaction
+ 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
+ ONLY ONE (where Client B could not get lock because of
+ missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
+ This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
+ Client B performed transaction on only one subvol (because entrylk()
+ could not be obtained on second subvol because of missing parent dir --
+ caused by premature/speculative succeeding of lookup() on /a when locks
+ are detected). Other subvol gets GFID-2 from Client C because while
+ it was waiting for entrylk() on both subvols, Client B was in the
+ middle of creating mkdir() on only one subvol, and Client C does not
+ "expect" this when it is between lock() and pre-op()/op() phase of the
+ transaction.
+ */
+ if (local->cont.lookup.parent_entrylk && local->enoent_count)
+ return _gf_true;
+
+ return _gf_false;
}
+
static void
afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
@@ -1512,6 +2071,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
int ret = -1;
gf_boolean_t sh_launched = _gf_false;
+ gf_boolean_t fail_conflict = _gf_false;
int gfid_miss_count = 0;
int enotconn_count = 0;
int up_children_count = 0;
@@ -1519,11 +2079,21 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
priv = this->private;
local = frame->local;
+ if (afr_is_entry_possibly_under_creation (local, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
+
if (local->op_ret < 0)
goto unwind;
+
+ if (local->cont.lookup.parent_entrylk && local->success_count > 1)
+ afr_succeed_lookup_on_latest_iatt (local, this);
+
gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
- up_children_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
enotconn_count = priv->child_count - up_children_count;
if ((gfid_miss_count == local->success_count) &&
(enotconn_count > 0)) {
@@ -1535,12 +2105,23 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
goto unwind;
}
- ret = afr_lookup_done_success_action (frame, this, _gf_false);
+ if ((gfid_miss_count == local->success_count) &&
+ uuid_is_null (local->cont.lookup.gfid_req)) {
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
+ local->loc.path);
+ goto unwind;
+ }
+
+ if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
+ fail_conflict = _gf_true;
+ ret = afr_lookup_done_success_action (frame, this, fail_conflict);
if (ret)
goto unwind;
uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
- afr_lookup_perform_self_heal_if_needed (frame, this, &sh_launched);
+ afr_lookup_perform_self_heal (frame, this, &sh_launched);
if (sh_launched) {
unwind = 0;
goto unwind;
@@ -1561,24 +2142,20 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ESTALE > ENOENT > others
- *
+ * The hierarchy is ESTALE > EIO > ENOENT > others
*/
-
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno)
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
{
- gf_boolean_t ret = _gf_true;
-
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
-
- /* Nothing should overwrite ENOENT, except ESTALE */
- else if ((old_errno == ENOENT) && (new_errno != ESTALE))
- ret = _gf_false;
-
- return ret;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (eio && (old_errno == EIO || new_errno == EIO))
+ return EIO;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+
+ return new_errno;
}
int32_t
@@ -1597,8 +2174,9 @@ afr_resultant_errno_get (int32_t *children,
} else {
child = i;
}
- if (afr_error_more_important (op_errno, child_errno[child]))
- op_errno = child_errno[child];
+ op_errno = afr_most_important_error(op_errno,
+ child_errno[child],
+ _gf_false);
}
return op_errno;
}
@@ -1610,8 +2188,8 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
if (op_errno == ENOENT)
local->enoent_count++;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
if (local->op_errno == ESTALE) {
local->op_ret = -1;
@@ -1625,7 +2203,7 @@ afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
afr_private_t *priv = NULL;
GF_ASSERT (inode);
- if (inode->ino != 1)
+ if (!__is_root_gfid (inode->gfid))
goto out;
if (!afr_is_fresh_lookup (&local->loc, this))
goto out;
@@ -1658,12 +2236,79 @@ afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
afr_set_root_inode_on_first_lookup (local, this, inode);
}
+static int32_t
+afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ priv = this->private;
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ child_index = (int32_t)(long)cookie;
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
+
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
static void
afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
+ afr_private_t *priv = this->private;
+
if (local->success_count == 0) {
if (local->op_errno != ESTALE) {
local->op_ret = op_ret;
@@ -1676,6 +2321,11 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind
afr_lookup_cache_args (local, child_index, xattr,
buf, postparent);
+
+ if (local->do_discovery && (priv->read_child == (-1))) {
+ afr_attempt_local_discovery(this,child_index);
+ }
+
local->cont.lookup.success_children[local->success_count] = child_index;
local->success_count++;
}
@@ -1722,6 +2372,8 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
int ret = -ENOMEM;
struct iatt *iatts = NULL;
int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
GF_ASSERT (local);
local->cont.lookup.xattrs = GF_CALLOC (child_count,
@@ -1740,15 +2392,25 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
goto out;
local->cont.lookup.bufs = iatts;
- success_children = afr_fresh_children_create (child_count);
+ success_children = afr_children_create (child_count);
if (NULL == success_children)
goto out;
local->cont.lookup.success_children = success_children;
- local->fresh_children = afr_fresh_children_create (child_count);
+ local->fresh_children = afr_children_create (child_count);
if (NULL == local->fresh_children)
goto out;
+ sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ local->cont.lookup.sources = sources;
+
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
ret = 0;
out:
return ret;
@@ -1766,42 +2428,55 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
int call_count = 0;
uint64_t ctx = 0;
int32_t op_errno = 0;
-
+ int allow_sh = 0;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
local->op_ret = -1;
frame->local = local;
+ local->fop = GF_FOP_LOOKUP;
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
+ loc_copy (&local->loc, loc);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0) {
+ op_errno = EINVAL;
goto out;
}
- loc_copy (&local->loc, loc);
+ if (local->loc.path &&
+ (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
+ op_errno = EPERM;
+ ret = -1;
+ goto out;
+ }
- ret = inode_ctx_get (loc->inode, this, &ctx);
+ ret = inode_ctx_get (local->loc.inode, this, &ctx);
if (ret == 0) {
/* lookup is a revalidate */
local->read_child_index = afr_inode_get_read_ctx (this,
- loc->inode,
- NULL);
+ local->loc.inode,
+ NULL);
} else {
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
+ if (priv->hash_mode) {
+ local->read_child_index = -1;
+ }
+ else {
+ local->read_child_index =
+ (++priv->read_child_rr) %
+ (priv->child_count);
+ }
}
UNLOCK (&priv->read_child_lock);
+ local->cont.lookup.fresh_lookup = _gf_true;
}
- if (loc->parent)
- local->cont.lookup.parent_ino = loc->parent->ino;
-
- local->child_up = memdup (priv->child_up, priv->child_count);
+ local->child_up = memdup (priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
if (NULL == local->child_up) {
op_errno = ENOMEM;
goto out;
@@ -1813,10 +2488,9 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
call_count = local->call_count;
-
if (local->call_count == 0) {
ret = -1;
op_errno = ENOTCONN;
@@ -1826,42 +2500,33 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
/* By default assume ENOTCONN. On success it will be set to 0. */
local->op_errno = ENOTCONN;
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
+ ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
+ &allow_sh);
+ dict_del (xattr_req, "allow-sh-for-running-transaction");
+ local->allow_sh_for_running_transaction = allow_sh;
- afr_xattr_req_prepare (this, local->xattr_req, loc->path);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
- }
-
- ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req);
+ ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
+ &gfid_req);
if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get the gfid from dict");
- } else {
- uuid_copy (local->cont.lookup.gfid_req, gfid_req);
+ local->op_errno = -ret;
+ goto out;
+ }
+ afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
+ &local->loc);
+ local->fop = GF_FOP_LOOKUP;
+ if (priv->choose_local && !priv->did_discovery) {
+ if (gfid_req && __is_root_gfid(gfid_req)) {
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
}
- if (local->loc.parent != NULL)
- dict_del (xattr_req, "gfid-req");
-
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_lookup_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->lookup,
- loc, local->xattr_req);
+ &local->loc, local->xattr_req);
if (!--call_count)
break;
}
@@ -1869,7 +2534,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
- if (ret == -1)
+ if (ret)
AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
NULL, NULL, NULL, NULL);
@@ -1880,7 +2545,7 @@ out:
/* {{{ open */
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
afr_private_t * priv = NULL;
int ret = -1;
@@ -1892,203 +2557,167 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)
priv = this->private;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
+ ret = __fd_ctx_get (fd, this, &ctx);
- if (ret == 0)
- goto unlock;
+ if (ret == 0)
+ goto out;
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
- INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->locked_on) {
+ ret = -ENOMEM;
+ goto out;
}
-unlock:
- UNLOCK (&fd->lock);
+
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+ INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
+
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
out:
return ret;
}
-/* {{{ flush */
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ int ret = -1;
- LOCK (&frame->lock);
+ LOCK (&fd->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
+ ret = __afr_fd_ctx_set (this, fd);
}
+ UNLOCK (&fd->lock);
- return 0;
+ return ret;
}
+/* {{{ flush */
int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
if (local->success_count == 0) {
local->op_ret = op_ret;
}
local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
}
local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
- if (need_unwind)
- afr_flush_unwind (frame, this);
+ call_count = afr_frame_return (frame);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret,
+ local->op_errno, NULL);
return 0;
}
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
- local = frame->local;
priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ local = frame->local;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- local->fd);
-
+ local->fd, NULL);
if (!--call_count)
break;
+
}
}
return 0;
}
-
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
- int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -2096,47 +2725,27 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ ret = afr_local_init(local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ local->fd = fd_ref(fd);
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub) {
+ ret = -1;
op_errno = ENOMEM;
goto out;
}
- transaction_frame->local = local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+ ret = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
-
-
- op_ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2158,17 +2767,18 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ GF_FREE (fd_ctx->pre_op_done);
+
+ GF_FREE (fd_ctx->opened_on);
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ GF_FREE (fd_ctx->locked_on);
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_piggyback);
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_acquired);
+
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
GF_FREE (fd_ctx);
}
@@ -2206,14 +2816,25 @@ afr_release (xlator_t *this, fd_t *fd)
/* {{{ fsync */
int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
int child_index = (long) cookie;
int read_child = 0;
+ call_stub_t *stub = NULL;
local = frame->local;
@@ -2229,13 +2850,13 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = 0;
if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
local->success_count++;
@@ -2248,9 +2869,32 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
}
return 0;
@@ -2259,14 +2903,13 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+ int32_t datasync, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2275,19 +2918,20 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
local->fd = fd_ref (fd);
- local->cont.fsync.ino = fd->inode->ino;
+
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -2295,17 +2939,16 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -2315,7 +2958,8 @@ out:
int32_t
afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2335,7 +2979,7 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2343,14 +2987,13 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+ int32_t datasync, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2359,33 +3002,30 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fsyncdir_cbk,
priv->children[i],
priv->children[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
@@ -2396,7 +3036,7 @@ out:
int32_t
afr_xattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2405,8 +3045,11 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2416,7 +3059,7 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.xattrop.xattr, xdata);
return 0;
}
@@ -2424,14 +3067,13 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2440,33 +3082,30 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_xattrop_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
- loc, optype, xattr);
+ loc, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2477,7 +3116,7 @@ out:
int32_t
afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2487,8 +3126,12 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2498,7 +3141,7 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.fxattrop.xattr, xdata);
return 0;
}
@@ -2506,14 +3149,13 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2522,33 +3164,30 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fxattrop_cbk,
priv->children[i],
priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
+ fd, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2557,7 +3196,7 @@ out:
int32_t
afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2578,7 +3217,7 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2586,14 +3225,14 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2602,41 +3241,39 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_inodelk_cbk,
priv->children[i],
priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2657,7 +3294,7 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2665,14 +3302,14 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2681,42 +3318,38 @@ afr_finodelk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_finodelk_cbk,
priv->children[i],
priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2736,7 +3369,7 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2745,14 +3378,14 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2761,34 +3394,31 @@ afr_entrylk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_entrylk_cbk,
priv->children[i],
priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -2796,7 +3426,7 @@ out:
int32_t
afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2817,7 +3447,7 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2826,14 +3456,14 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2842,41 +3472,38 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fentrylk_cbk,
priv->children[i],
priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
afr_statfs_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
+ struct statvfs *statvfs, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = 0;
@@ -2907,7 +3534,7 @@ afr_statfs_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
+ &local->cont.statfs.buf, xdata);
return 0;
}
@@ -2915,7 +3542,7 @@ afr_statfs_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
afr_private_t * priv = NULL;
int child_count = 0;
@@ -2923,7 +3550,6 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
int i = 0;
int ret = -1;
int call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (this, out);
@@ -2933,15 +3559,13 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
priv = this->private;
child_count = priv->child_count;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- frame->local = local;
call_count = local->call_count;
for (i = 0; i < child_count; i++) {
@@ -2949,24 +3573,24 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_statfs_cbk,
priv->children[i],
priv->children[i]->fops->statfs,
- loc);
+ loc, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -2976,7 +3600,7 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0)
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
+ lock, xdata);
return 0;
}
@@ -2998,7 +3622,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
return 0;
}
@@ -3012,7 +3636,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->lk,
local->fd, F_SETLK,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, NULL);
if (!--call_count)
break;
@@ -3025,7 +3649,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -3060,12 +3684,12 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index],
priv->children[child_index]->fops->lk,
local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, xdata);
} else if (local->op_ret == -1) {
/* all nodes have gone down */
AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
} else {
/* locking has succeeded on all nodes that are up */
@@ -3083,7 +3707,7 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
}
return 0;
@@ -3092,13 +3716,13 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int i = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -3106,10 +3730,12 @@ afr_lk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
sizeof (*local->cont.lk.locked_nodes),
@@ -3128,13 +3754,12 @@ afr_lk (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
priv->children[i],
priv->children[i]->fops->lk,
- fd, cmd, flock);
+ fd, cmd, flock, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -3150,8 +3775,7 @@ afr_forget (xlator_t *this, inode_t *inode)
goto out;
ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx->fresh_children);
GF_FREE (ctx);
out:
return 0;
@@ -3172,41 +3796,23 @@ afr_priv_dump (xlator_t *this)
GF_ASSERT (priv);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_build_key(key, key_prefix, "child_count");
- gf_proc_dump_write(key, "%u", priv->child_count);
- gf_proc_dump_build_key(key, key_prefix, "read_child_rr");
- gf_proc_dump_write(key, "%u", priv->read_child_rr);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
for (i = 0; i < priv->child_count; i++) {
- gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i);
+ sprintf (key, "child_up[%d]", i);
gf_proc_dump_write(key, "%d", priv->child_up[i]);
- gf_proc_dump_build_key(key, key_prefix,
- "pending_key[%d]", i);
+ sprintf (key, "pending_key[%d]", i);
gf_proc_dump_write(key, "%s", priv->pending_key[i]);
}
- gf_proc_dump_build_key(key, key_prefix, "data_self_heal");
- gf_proc_dump_write(key, "%d", priv->data_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal");
- gf_proc_dump_write(key, "%d", priv->metadata_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "entry_self_heal");
- gf_proc_dump_write(key, "%d", priv->entry_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "data_change_log");
- gf_proc_dump_write(key, "%d", priv->data_change_log);
- gf_proc_dump_build_key(key, key_prefix, "metadata_change_log");
- gf_proc_dump_write(key, "%d", priv->metadata_change_log);
- gf_proc_dump_build_key(key, key_prefix, "entry_change_log");
- gf_proc_dump_write(key, "%d", priv->entry_change_log);
- gf_proc_dump_build_key(key, key_prefix, "read_child");
- gf_proc_dump_write(key, "%d", priv->read_child);
- gf_proc_dump_build_key(key, key_prefix, "favorite_child");
- gf_proc_dump_write(key, "%d", priv->favorite_child);
- gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->data_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->entry_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "wait_count");
- gf_proc_dump_write(key, "%u", priv->wait_count);
+ gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
+ gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
+ gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
return 0;
}
@@ -3236,24 +3842,35 @@ find_child_index (xlator_t *this, xlator_t *child)
int32_t
afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+ void *data, void *data2)
{
afr_private_t *priv = NULL;
int i = -1;
int up_children = 0;
int down_children = 0;
int propagate = 0;
-
int had_heard_from_all = 0;
int have_heard_from_all = 0;
int idx = -1;
int ret = -1;
+ int call_psh = 0;
+ int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
priv = this->private;
if (!priv)
return 0;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
had_heard_from_all = 1;
for (i = 0; i < priv->child_count; i++) {
if (!priv->last_event[i]) {
@@ -3264,7 +3881,7 @@ afr_notify (xlator_t *this, int32_t event,
/* parent xlators dont need to know about every child_up, child_down
* because of afr ha. If all subvolumes go down, child_down has
* to be triggered. In that state when 1 subvolume comes up child_up
- * needs to be triggered. dht optimises revalidate lookup by sending
+ * needs to be triggered. dht optimizes revalidate lookup by sending
* it only to one of its subvolumes. When child up/down happens
* for afr's subvolumes dht should be notified by child_modified. The
* subsequent revalidate lookup happens on all the dht's subvolumes
@@ -3281,9 +3898,19 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_UP:
LOCK (&priv->lock);
{
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ }
priv->child_up[idx] = 1;
- priv->up_count++;
+ call_psh = 1;
+ up_child = idx;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 1)
up_children++;
@@ -3304,8 +3931,22 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_DOWN:
LOCK (&priv->lock);
{
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ }
priv->child_up[idx] = 0;
- priv->down_count++;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 0)
@@ -3330,7 +3971,16 @@ afr_notify (xlator_t *this, int32_t event,
priv->last_event[idx] = event;
}
UNLOCK (&priv->lock);
+
break;
+
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
+ break;
+
default:
propagate = 1;
break;
@@ -3357,6 +4007,8 @@ afr_notify (xlator_t *this, int32_t event,
LOCK (&priv->lock);
{
+ up_children = afr_up_children_count (priv->child_up,
+ priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
event = GF_EVENT_CHILD_UP;
@@ -3376,6 +4028,8 @@ afr_notify (xlator_t *this, int32_t event,
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
+ if (call_psh && priv->shd.iamshd)
+ afr_proactive_self_heal ((void*) (long) up_child);
out:
return ret;
@@ -3400,29 +4054,56 @@ afr_first_up_child (unsigned char *child_up, size_t child_count)
}
int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
{
+ int ret = -1;
+
local->op_ret = -1;
local->op_errno = EUCLEAN;
- local->call_count = afr_up_children_count (priv->child_count,
- priv->child_up);
- if (local->call_count == 0) {
- gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
- return -ENOTCONN;
- }
-
- local->child_up = GF_CALLOC (sizeof (*local->child_up),
- priv->child_count,
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
gf_afr_mt_char);
if (!local->child_up) {
- return -ENOMEM;
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
}
memcpy (local->child_up, priv->child_up,
sizeof (*local->child_up) * priv->child_count);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
- return 0;
+ local->child_errno = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_errno),
+ gf_afr_mt_int32_t);
+ if (!local->child_errno) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
+ sizeof (int),
+ gf_afr_mt_int32_t);
+ if (!local->transaction.postop_piggybacked) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->append_write = _gf_false;
+
+ ret = 0;
+out:
+ return ret;
}
int
@@ -3431,16 +4112,6 @@ afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
{
int ret = -ENOMEM;
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
-
- lk->entry_locked_nodes = GF_CALLOC (sizeof (*lk->entry_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->entry_locked_nodes)
- goto out;
-
lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->locked_nodes)
@@ -3460,56 +4131,118 @@ out:
return ret;
}
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
int
-afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
+afr_transaction_local_init (afr_local_t *local, xlator_t *this)
{
- int i;
- int child_up_count = 0;
- int ret = -ENOMEM;
+ int child_up_count = 0;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+ priv = this->private;
ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
AFR_TRANSACTION_LK);
if (ret < 0)
goto out;
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
ret = -ENOMEM;
- child_up_count = afr_up_children_count (priv->child_count, local->child_up);
+ child_up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
if (priv->optimistic_change_log && child_up_count == priv->child_count)
local->optimistic_change_log = 1;
local->first_up_child = afr_first_up_child (local->child_up,
priv->child_count);
- local->child_errno = GF_CALLOC (sizeof (*local->child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!local->child_errno)
+ local->transaction.eager_lock =
+ GF_CALLOC (sizeof (*local->transaction.eager_lock),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+
+ if (!local->transaction.eager_lock)
goto out;
- local->pending = GF_CALLOC (sizeof (*local->pending),
- priv->child_count,
- gf_afr_mt_int32_t);
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
- if (!local->pending)
+ local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op)
goto out;
- local->fresh_children = afr_fresh_children_create (priv->child_count);
- if (!local->fresh_children)
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
goto out;
- for (i = 0; i < priv->child_count; i++) {
- local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
- 3, /* data + metadata + entry */
- gf_afr_mt_int32_t);
- if (!local->pending[i])
- goto out;
- }
+ local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->transaction.txn_changelog)
+ goto out;
- local->transaction.child_errno =
- GF_CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- local->transaction.erase_pending = 1;
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
out:
@@ -3525,54 +4258,334 @@ afr_reset_children (int32_t *fresh_children, int32_t child_count)
}
int32_t*
-afr_fresh_children_create (int32_t child_count)
+afr_children_create (int32_t child_count)
{
- int32_t *fresh_children = NULL;
+ int32_t *children = NULL;
int i = 0;
GF_ASSERT (child_count > 0);
- fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children),
- gf_afr_mt_int32_t);
- if (NULL == fresh_children)
+ children = GF_CALLOC (child_count, sizeof (*children),
+ gf_afr_mt_int32_t);
+ if (NULL == children)
goto out;
for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
+ children[i] = -1;
out:
- return fresh_children;
+ return children;
}
void
-afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
- int32_t child_count)
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count)
{
gf_boolean_t child_found = _gf_false;
int i = 0;
for (i = 0; i < child_count; i++) {
- if (fresh_children[i] == -1)
+ if (children[i] == -1)
break;
- if (fresh_children[i] == child) {
+ if (children[i] == child) {
child_found = _gf_true;
break;
}
}
+
if (!child_found) {
GF_ASSERT (i < child_count);
- fresh_children[i] = child;
+ children[i] = child;
+ }
+}
+
+void
+afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ if (i != (child_count - 1))
+ memmove (children + i, children + i + 1,
+ sizeof (*children)*(child_count - i - 1));
+ children[child_count - 1] = -1;
+ break;
+ }
}
}
int
-afr_get_children_count (int32_t *fresh_children, unsigned int child_count)
+afr_get_children_count (int32_t *children, unsigned int child_count)
{
int count = 0;
int i = 0;
for (i = 0; i < child_count; i++) {
- if (fresh_children[i] == -1)
+ if (children[i] == -1)
break;
count++;
}
return count;
}
+
+void
+afr_set_low_priority (call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
+
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags)
+{
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ GF_ASSERT (fd && fd->inode);
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ fd_ctx->opened_on[child] = AFR_FD_OPENED;
+ if (!IA_ISDIR (fd->inode->ia_type)) {
+ fd_ctx->flags = flags;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
+{
+ unsigned int quorum = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
+
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
+
+out:
+ return _gf_false;
+}
+
+void
+afr_priv_destroy (afr_private_t *priv)
+{
+ int i = 0;
+
+ if (!priv)
+ goto out;
+ inode_unref (priv->root_inode);
+ GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
+ for (i = 0; i < priv->child_count; i++)
+ {
+ if (priv->shd.statistics[i])
+ eh_destroy (priv->shd.statistics[i]);
+ }
+
+ GF_FREE (priv->shd.statistics);
+
+ GF_FREE (priv->shd.crawl_events);
+
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
+ for (i = 0; i < priv->child_count; i++)
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+ LOCK_DESTROY (&priv->read_child_lock);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+xlator_subvolume_count (xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
+}
+
+inline gf_boolean_t
+afr_is_errno_set (int *child_errno, int child)
+{
+ return child_errno[child];
+}
+
+inline gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child)
+{
+ return !afr_is_errno_set (child_errno, child);
+}
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count)
+{
+ int midx = 0;
+ int idx = 0;
+ int i = 0;
+
+ midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (IA_ISDIR (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ else if (IA_ISREG (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ else
+ idx = -1;
+ for (i = 0; i < child_count; i++) {
+ if (is_pending (ctx, i)) {
+ pending[i][midx] = hton32 (1);
+ if (idx == -1)
+ continue;
+ pending[i][idx] = hton32 (1);
+ }
+ }
+}
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
+ }
+ UNLOCK (&inode->lock);
+}
+
+int
+afr_initialise_statistics (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int i = 0;
+ int child_count = 0;
+ eh_t *stats_per_brick = NULL;
+ shd_crawl_event_t ***shd_crawl_events = NULL;
+ priv = this->private;
+
+ priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!priv->shd.statistics) {
+ ret = -1;
+ goto out;
+ }
+ child_count = priv->child_count;
+ for (i=0; i < child_count ; i++) {
+ stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ _destroy_crawl_event_data);
+ if (!stats_per_brick) {
+ ret = -1;
+ goto out;
+ }
+ priv->shd.statistics[i] = stats_per_brick;
+
+ }
+
+ shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
+ *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
+ priv->child_count,
+ gf_afr_mt_shd_crawl_event_t);
+
+ if (!priv->shd.crawl_events) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 0fac7324c..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -51,7 +42,7 @@
int
afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, int32_t sh_failed)
{
afr_local_t *local = NULL;
@@ -60,7 +51,7 @@ afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
afr_set_opendir_done (this, local->fd->inode);
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
return 0;
}
@@ -99,22 +90,24 @@ __checksums_differ (uint32_t *checksum, int child_count,
int32_t
afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
gf_dirent_t * entry = NULL;
gf_dirent_t * tmp = NULL;
+ char *reason = NULL;
int child_index = 0;
uint32_t entry_cksum = 0;
int call_count = 0;
off_t last_offset = 0;
- char sh_type_str[256] = {0,};
+ inode_t *inode = NULL;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ inode = local->fd->inode;
child_index = (long) cookie;
@@ -135,7 +128,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
}
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
+ entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
strlen (entry->d_name));
local->cont.opendir.checksum[child_index] ^= entry_cksum;
}
@@ -150,7 +143,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
+ local->fd, 131072, last_offset, NULL);
return 0;
@@ -162,27 +155,18 @@ out:
priv->child_count,
local->child_up)) {
- sh->need_entry_self_heal = _gf_true;
+ sh->do_entry_self_heal = _gf_true;
sh->forced_merge = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_examine_dir_sh_unwind;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_INFO,
- "%s self-heal triggered. path: %s, "
- "reason: checksums of directory differ,"
- " forced merge option set",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this, local->fd->inode);
+
+ reason = "checksums of directory differ";
+ afr_launch_self_heal (frame, this, inode, _gf_false,
+ inode->ia_type, reason, NULL,
+ afr_examine_dir_sh_unwind);
} else {
- afr_set_opendir_done (this, local->fd->inode);
+ afr_set_opendir_done (this, inode);
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
}
}
@@ -205,7 +189,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
sizeof (*local->cont.opendir.checksum),
gf_afr_mt_int32_t);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
@@ -215,7 +199,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
+ local->fd, 131072, 0, NULL);
if (!--call_count)
break;
@@ -229,27 +213,37 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
int32_t
afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int32_t up_children_count = 0;
int ret = -1;
int call_count = -1;
+ int32_t child_index = 0;
priv = this->private;
local = frame->local;
+ child_index = (long) cookie;
- up_children_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
LOCK (&frame->lock);
{
- if (op_ret >= 0)
+ if (op_ret >= 0) {
local->op_ret = op_ret;
+ ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
local->op_errno = op_errno;
}
+unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
@@ -258,17 +252,8 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
if (local->op_ret != 0)
goto out;
- ret = afr_fd_ctx_set (this, local->fd);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set fd ctx for fd %p",
- local->fd);
- goto out;
- }
if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
+ up_children_count > 1 && priv->entry_self_heal) {
/*
* This is the first opendir on this inode. We need
@@ -277,7 +262,7 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
* to regular entry self-heal because the readdir
* call is sent only to the first subvolume, and
* thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anamolies).
+ * otherwise (assuming changelog shows no anomalies).
*/
gf_log (this->name, GF_LOG_TRACE,
@@ -296,7 +281,7 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
out:
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
return 0;
}
@@ -312,7 +297,6 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
int i = 0;
int ret = -1;
int call_count = -1;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -323,37 +307,36 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
child_count = priv->child_count;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
loc_copy (&local->loc, loc);
- frame->local = local;
local->fd = fd_ref (fd);
call_count = local->call_count;
for (i = 0; i < child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
+ STACK_WIND_COOKIE (frame, afr_opendir_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ loc, fd, NULL);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
return 0;
}
@@ -375,85 +358,6 @@ struct entry_name {
struct list_head list;
};
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
-{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
-
-out:
- return ret;
-}
-
-
-static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
-{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
- }
-}
-
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
-{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
-
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
- }
-
- return offset;
-}
-
-
static void
afr_forget_entries (fd_t *fd)
{
@@ -479,178 +383,70 @@ afr_forget_entries (fd_t *fd)
}
}
-
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
gf_dirent_t * entry = NULL;
gf_dirent_t * tmp = NULL;
- int child_index = -1;
-
- priv = this->private;
- local = frame->local;
- child_index = (long) cookie;
-
- if (op_ret == -1)
- goto out;
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
list_del_init (&entry->list);
GF_FREE (entry);
}
}
-
-out:
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
-
- return 0;
}
-
int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+afr_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t next_call_child = -1;
- int ret = 0;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- int32_t *last_index = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
- int32_t call_child = -1;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ if (op_ret == -1)
+ goto out;
local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- read_child = (long) cookie;
- last_index = &local->cont.readdir.last_index;
- fresh_children = local->fresh_children;
-
- /* the value of the last_index changes if afr_next_call_child is
- * called. So to find the call_child of this callback use last_index
- * before the next_call_child call.
- */
- if (*last_index == -1)
- call_child = read_child;
- else
- call_child = fresh_children[*last_index];
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (op_ret == -1) {
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index,
- read_child);
- if (next_call_child < 0)
- goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- next_call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readdirp,
- local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
- afr_remember_entries (entries, local->fd);
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that'll make the application stop reading. So
- try to get more entries */
+int32_t
+afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[call_child]->name);
+ if (op_ret == -1)
+ goto out;
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[call_child],
- children[call_child]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
-
+ AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
return 0;
}
int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+ fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -659,16 +455,14 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -676,79 +470,67 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
read_child = afr_inode_get_read_ctx (this, fd->inode,
local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
- fd_ctx->last_tried = call_child;
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
}
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
+
if (whichop == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdir, fd,
- size, offset);
+ size, offset, dict);
else
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdirp, fd,
- size, offset);
+ size, offset, dict);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 6a6bc6354..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,23 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 21287f8b8..1943b719b 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -47,33 +38,222 @@
#include "afr.h"
#include "afr-transaction.h"
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
+{
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->path = gf_strdup( dirname (child_path) );
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+out:
+ GF_FREE(child_path);
+
+ return ret;
+}
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
- char *tmp = NULL;
+ afr_local_t *local = NULL;
- if (!child->parent) {
- //this should never be called with root as the child
- GF_ASSERT (0);
- loc_copy (parent, child);
- return;
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret > -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0) ||
+ (child_index == local->read_child_index)) {
+ local->cont.dir_fop.preparent = *preparent;
+ local->cont.dir_fop.postparent = *postparent;
+ if (buf)
+ local->cont.dir_fop.buf = *buf;
+ if (prenewparent)
+ local->cont.dir_fop.prenewparent = *prenewparent;
+ if (postnewparent)
+ local->cont.dir_fop.postnewparent = *postnewparent;
+ }
+
+ local->cont.dir_fop.inode = inode;
+
+ local->fresh_children[local->success_count] = child_index;
+ local->success_count++;
+ local->child_errno[child_index] = 0;
+ } else {
+ local->child_errno[child_index] = op_errno;
}
- tmp = gf_strdup (child->path);
- parent->path = gf_strdup (dirname (tmp));
- GF_FREE (tmp);
+ local->op_errno = op_errno;
+}
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+int
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = 0;
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (frame);
+ }
+ return 0;
+}
+
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ GF_UNUSED int op_errno = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
+ new_local = new_frame->local;
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
+ gf_afr_mt_dict_t);
+ if (!xattr)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+ xattr[i] = dict_new ();
+ if (!xattr[i])
+ goto out;
+ }
+
+ afr_prepare_new_entry_pending_matrix (changelog,
+ afr_is_errno_set,
+ local->child_errno,
+ &local->cont.dir_fop.buf,
+ priv->child_count);
+
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
+ new_local->call_count = local->success_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+
+ afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr[i], NULL);
+ }
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ afr_xattr_array_destroy (xattr, priv->child_count);
+ return;
+}
+
+gf_boolean_t
+afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
+{
+ glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
+ int i = 0;
+
+ for (i = 0; fops[i] != GF_FOP_NULL; i++) {
+ if (fop == fops[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ if (local->success_count == priv->child_count)
+ goto out;
+
+ if (!afr_is_new_entry_changelog_needed (local->op))
+ goto out;
+
+ afr_mark_new_entry_changelog (frame, this);
+
+out:
+ return;
+}
+
+void
+afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
- if (!uuid_is_null (child->pargfid))
- uuid_copy (parent->gfid, child->pargfid);
+ if (local->cont.dir_fop.inode == NULL)
+ goto done;
+ afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child,
+ local->cont.dir_fop.buf.ia_gfid);
+done:
+ local->transaction.unwind (frame, this);
+ afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ local->transaction.resume (frame, this);
}
/* {{{ create */
@@ -83,7 +263,6 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -97,18 +276,14 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
AFR_STACK_UNWIND (create, main_frame,
local->op_ret, local->op_errno,
local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ local->xdata_rsp);
}
return 0;
@@ -119,32 +294,24 @@ int
afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
+ if (op_ret > -1) {
ret = afr_fd_ctx_set (this, fd);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not set ctx on fd=%p", fd);
@@ -155,7 +322,6 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
ret = fd_ctx_get (fd, this, &ctx);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not get fd ctx for fd=%p", fd);
@@ -166,26 +332,17 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child_index] = 1;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
fd_ctx->flags = local->cont.create.flags;
- if (local->success_count == 0)
- local->cont.create.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
-
- local->cont.create.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
+ if (local->success_count == 0) {
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
}
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
unlock:
@@ -193,15 +350,8 @@ unlock:
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -218,7 +368,8 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -228,7 +379,7 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -236,8 +387,9 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
&local->loc,
local->cont.create.flags,
local->cont.create.mode,
+ local->umask,
local->cont.create.fd,
- local->cont.create.params);
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -265,14 +417,14 @@ afr_create_done (call_frame_t *frame, xlator_t *this)
int
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -280,20 +432,20 @@ afr_create (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(create,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -304,33 +456,49 @@ afr_create (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
+ local->umask = umask;
if (params)
- local->cont.create.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.create.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
local->transaction.fop = afr_create_wind;
local->transaction.done = afr_create_done;
local->transaction.unwind = afr_create_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -345,7 +513,6 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -359,17 +526,13 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
AFR_STACK_UNWIND (mknod, main_frame,
local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -380,58 +543,25 @@ int
afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mknod.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -448,7 +578,8 @@ afr_mknod_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -458,13 +589,14 @@ afr_mknod_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->mknod,
&local->loc, local->cont.mknod.mode,
local->cont.mknod.dev,
- local->cont.mknod.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -489,15 +621,15 @@ afr_mknod_done (call_frame_t *frame, xlator_t *this)
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -505,20 +637,20 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(mknod,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -529,32 +661,48 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
+ local->umask = umask;
if (params)
- local->cont.mknod.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.mknod.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
local->transaction.fop = afr_mknod_wind;
local->transaction.done = afr_mknod_done;
local->transaction.unwind = afr_mknod_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -570,7 +718,6 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -584,17 +731,13 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
AFR_STACK_UNWIND (mkdir, main_frame,
local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -605,58 +748,25 @@ int
afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mkdir.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -673,7 +783,8 @@ afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -683,13 +794,14 @@ afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->mkdir,
&local->loc, local->cont.mkdir.mode,
- local->cont.mkdir.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -713,17 +825,16 @@ afr_mkdir_done (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -731,20 +842,20 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(mkdir,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -756,31 +867,47 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
UNLOCK (&priv->read_child_lock);
local->cont.mkdir.mode = mode;
+ local->umask = umask;
if (params)
- local->cont.mkdir.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.mkdir.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_MKDIR;
local->transaction.fop = afr_mkdir_wind;
local->transaction.done = afr_mkdir_done;
local->transaction.unwind = afr_mkdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -796,7 +923,6 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -810,17 +936,13 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
AFR_STACK_UNWIND (link, main_frame,
local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -831,59 +953,25 @@ int
afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -900,7 +988,8 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -910,12 +999,13 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->link,
&local->loc,
- &local->newloc);
+ &local->newloc, local->xdata_req);
if (!--call_count)
break;
@@ -941,14 +1031,14 @@ afr_link_done (call_frame_t *frame, xlator_t *this)
int
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -956,23 +1046,25 @@ afr_link (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(link,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
LOCK (&priv->read_child_lock);
{
@@ -981,30 +1073,41 @@ afr_link (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
- local->cont.link.ino = oldloc->inode->ino;
-
- if (oldloc->parent)
- local->cont.link.parent_ino = newloc->parent->ino;
-
+ local->op = GF_FOP_LINK;
local->transaction.fop = afr_link_wind;
local->transaction.done = afr_link_done;
local->transaction.unwind = afr_link_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (link, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1020,7 +1123,6 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -1034,17 +1136,13 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
AFR_STACK_UNWIND (symlink, main_frame,
local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1055,58 +1153,25 @@ int
afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.symlink.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1123,7 +1188,8 @@ afr_symlink_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1133,14 +1199,15 @@ afr_symlink_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->symlink,
local->cont.symlink.linkpath,
&local->loc,
- local->cont.symlink.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
@@ -1167,14 +1234,14 @@ afr_symlink_done (call_frame_t *frame, xlator_t *this)
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1182,20 +1249,20 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(symlink,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -1207,30 +1274,46 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
UNLOCK (&priv->read_child_lock);
local->cont.symlink.linkpath = gf_strdup (linkpath);
+ local->umask = umask;
if (params)
- local->cont.symlink.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.symlink.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_SYMLINK;
local->transaction.fop = afr_symlink_wind;
local->transaction.done = afr_symlink_done;
local->transaction.unwind = afr_symlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1245,7 +1328,6 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -1259,19 +1341,14 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
AFR_STACK_UNWIND (rename, main_frame,
local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent);
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent,
+ NULL);
}
return 0;
@@ -1282,7 +1359,8 @@ int
afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -1296,38 +1374,22 @@ afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1344,7 +1406,8 @@ afr_rename_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1354,13 +1417,13 @@ afr_rename_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->rename,
&local->loc,
- &local->newloc);
+ &local->newloc, NULL);
if (!--call_count)
break;
}
@@ -1385,14 +1448,15 @@ afr_rename_done (call_frame_t *frame, xlator_t *this)
int
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1400,54 +1464,90 @@ afr_rename (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(rename,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
- local->cont.rename.ino = oldloc->inode->ino;
-
- if (oldloc->parent)
- local->cont.rename.oldparent_ino = oldloc->parent->ino;
- if (newloc->parent)
- local->cont.rename.newparent_ino = newloc->parent->ino;
-
+ local->op = GF_FOP_RENAME;
local->transaction.fop = afr_rename_wind;
local->transaction.done = afr_rename_done;
local->transaction.unwind = afr_rename_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (oldloc->path);
local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
+ nlockee++;
+ }
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rename, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1477,8 +1577,9 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (unlink, main_frame,
local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent);
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1488,51 +1589,28 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
int
afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int call_count = -1;
int child_index = (long) cookie;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
if (child_index == local->read_child_index) {
local->read_child_returned = _gf_true;
}
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1549,7 +1627,8 @@ afr_unlink_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1559,12 +1638,13 @@ afr_unlink_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->unlink,
- &local->loc);
+ &local->loc, local->xflag,
+ local->xdata_req);
if (!--call_count)
break;
@@ -1590,14 +1670,14 @@ afr_unlink_done (call_frame_t *frame, xlator_t *this)
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1605,44 +1685,62 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(unlink,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- if (loc->parent)
- local->cont.unlink.parent_ino = loc->parent->ino;
-
+ local->op = GF_FOP_UNLINK;
local->transaction.fop = afr_unlink_wind;
local->transaction.done = afr_unlink_done;
local->transaction.unwind = afr_unlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, op_ret, op_errno,
- NULL, NULL);
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
+ NULL, NULL, NULL);
}
return 0;
@@ -1674,8 +1772,9 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (rmdir, main_frame,
local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent);
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1685,52 +1784,36 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
int
afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int call_count = -1;
int child_index = (long) cookie;
int read_child = 0;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
-
if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL,
+ NULL);
+
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1747,7 +1830,8 @@ afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1757,12 +1841,13 @@ afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags);
+ &local->loc, local->cont.rmdir.flags,
+ NULL);
if (!--call_count)
break;
@@ -1788,14 +1873,15 @@ afr_rmdir_done (call_frame_t *frame, xlator_t *this)
int
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1803,45 +1889,71 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(rmdir,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
local->cont.rmdir.flags = flags;
loc_copy (&local->loc, loc);
- if (loc->parent)
- local->cont.rmdir.parent_ino = loc->parent->ino;
-
+ local->op = GF_FOP_RMDIR;
local->transaction.fop = afr_rmdir_wind;
local->transaction.done = afr_rmdir_done;
local->transaction.unwind = afr_rmdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
- op_ret = 0;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
- NULL, NULL);
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 0290c6350..02f0a3682 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -23,38 +14,34 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, dict_t *params);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 7d7cc3d80..e06e3b2f2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -44,9 +35,6 @@
#include "compat-errno.h"
#include "compat.h"
-#include "afr.h"
-
-
/**
* Common algorithm for inode read calls:
*
@@ -61,7 +49,7 @@
int32_t
afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -95,12 +83,13 @@ afr_access_cbk (call_frame_t *frame, void *cookie,
(void *) (long) read_child,
children[next_call_child],
children[next_call_child]->fops->access,
- &local->loc, local->cont.access.mask);
+ &local->loc, local->cont.access.mask,
+ NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
}
return 0;
@@ -108,15 +97,16 @@ out:
int32_t
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
xlator_t **children = NULL;
int call_child = 0;
afr_local_t *local = NULL;
- int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -127,16 +117,16 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -145,13 +135,12 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
read_child = afr_inode_get_read_ctx (this, loc->inode,
local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
+ ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
&local->cont.access.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
@@ -162,13 +151,12 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->access,
- loc, mask);
+ loc, mask, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
return 0;
}
@@ -180,7 +168,7 @@ out:
int32_t
afr_stat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -214,12 +202,12 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,
(void *) (long) read_child,
children[next_call_child],
children[next_call_child]->fops->stat,
- &local->loc);
+ &local->loc, NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
}
return 0;
@@ -227,15 +215,15 @@ out:
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
xlator_t **children = NULL;
int call_child = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -246,15 +234,16 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -262,29 +251,25 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
read_child = afr_inode_get_read_ctx (this, loc->inode,
local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
+ ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
&local->cont.stat.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
loc_copy (&local->loc, loc);
- local->cont.stat.ino = loc->inode->ino;
-
STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->stat,
- loc);
+ loc, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -296,7 +281,8 @@ out:
int32_t
afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -330,12 +316,12 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(void *) (long) read_child,
children[next_call_child],
children[next_call_child]->fops->fstat,
- local->fd);
+ local->fd, NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
}
return 0;
@@ -344,15 +330,15 @@ out:
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
xlator_t **children = NULL;
int call_child = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t read_child = 0;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -366,16 +352,16 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd->inode, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ AFR_SBRAIN_CHECK_FD (fd, out);
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -386,29 +372,28 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
- op_ret = afr_get_call_child (this, local->child_up, read_child,
+ ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
&local->cont.fstat.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
- local->cont.fstat.ino = fd->inode->ino;
local->fd = fd_ref (fd);
+ afr_open_fd_fix (fd, this);
+
STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->fstat,
- fd);
+ fd, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -420,7 +405,7 @@ out:
int32_t
afr_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf)
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -454,12 +439,13 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->readlink,
&local->loc,
- local->cont.readlink.size);
+ local->cont.readlink.size, NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf);
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
+ xdata);
}
return 0;
@@ -468,15 +454,15 @@ out:
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
afr_private_t *priv = NULL;
xlator_t **children = NULL;
int call_child = 0;
afr_local_t *local = NULL;
- int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -487,47 +473,45 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
read_child = afr_inode_get_read_ctx (this, loc->inode,
local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
+ ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
&local->cont.readlink.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
loc_copy (&local->loc, loc);
local->cont.readlink.size = size;
- local->cont.readlink.ino = loc->inode->ino;
STACK_WIND_COOKIE (frame, afr_readlink_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readlink,
- loc, size);
+ loc, size, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -542,7 +526,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -554,13 +538,14 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
@@ -590,7 +575,7 @@ __filter_xattrs (dict_t *dict)
int32_t
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -624,7 +609,8 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->getxattr,
&local->loc,
- local->cont.getxattr.name);
+ local->cont.getxattr.name,
+ NULL);
}
out:
@@ -632,39 +618,620 @@ out:
if (op_ret >= 0 && dict)
__filter_xattrs (dict);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
}
return 0;
}
int32_t
-afr_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int32_t
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *pathinfo = NULL;
- char *pathinfo_serz = NULL;
- char pathinfo_cky[1024] = {0,};
- dict_t *xattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
if (!frame || !frame->local || !this) {
- gf_log (this->name, GF_LOG_ERROR, "possible NULL deref");
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
goto out;
}
@@ -682,90 +1249,222 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
local->dict = dict_new ();
if (local->dict) {
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
if (ret)
goto out;
- pathinfo = gf_strdup (pathinfo);
+ xattr = gf_strdup (xattr);
- snprintf (pathinfo_cky, 1024, "%s-%ld", GF_XATTR_PATHINFO_KEY, cky);
- ret = dict_set_dynstr (local->dict, pathinfo_cky, pathinfo);
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo cookie key");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
goto out;
}
- local->cont.getxattr.pathinfo_len += strlen (pathinfo) + 1;
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
}
}
out:
UNLOCK (&frame->lock);
if (!callcnt) {
- if (!local->cont.getxattr.pathinfo_len)
+ if (!local->cont.getxattr.xattr_len)
goto unwind;
- xattr = dict_new ();
- if (!xattr)
+ nxattr = dict_new ();
+ if (!nxattr)
goto unwind;
/* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.pathinfo_len += (padding + 2);
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
- pathinfo_serz = GF_CALLOC (local->cont.getxattr.pathinfo_len, sizeof (char),
- gf_common_mt_char);
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
- if (!pathinfo_serz)
+ if (!xattr_serz)
goto unwind;
/* the xlator info */
- sprintf (pathinfo_serz, "(<"AFR_PATHINFO_HEADER"%s> ", this->name);
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
/* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict, pathinfo_serz + strlen (pathinfo_serz),
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
&tlen, ' ');
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing dictionary");
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
goto unwind;
}
/* closing part */
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr (xattr, GF_XATTR_PATHINFO_KEY, pathinfo_serz);
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo key in dict");
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
- if (local->dict)
- dict_unref (local->dict);
-
- if (xattr)
- dict_unref (xattr);
+ if (nxattr)
+ dict_unref (nxattr);
}
return ret;
}
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_min_stime (THIS, data, key, value);
+
+ return ret;
+}
+
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- xlator_t **sub_volumes = NULL;
- int i = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
+
+
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT (cbk);
+ if (!cbk) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
+
+out:
+ return is_spl;
+}
+
+static void
+afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->getxattr,
+ loc, name, NULL);
+ }
+ return;
+}
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -776,48 +1475,108 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ if (!name)
+ goto no_name;
+ local->cont.getxattr.name = gf_strdup (name);
+
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no data present for key %s",
+ loc->path, name);
+ op_errno = ENODATA;
+ goto out;
+ }
+ if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
+ priv->vol_uuid)) {
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
gf_log (this->name, GF_LOG_INFO,
- "%s: no data present for key %s",
+ "%s: failed to get marker attr (%s)",
loc->path, name);
- op_errno = ENODATA;
+ op_errno = EINVAL;
goto out;
}
- if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ return 0;
+ }
+
+ /*
+ * if we are doing getxattr with pathinfo as the key then we
+ * collect information from all childs
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_frm_all_children (this, frame, name,
+ loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (*priv->vol_uuid) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
for (i = 0, trav = this->children; trav ;
trav = trav->next, i++) {
*(sub_volumes + i) = trav->xlator;
+
}
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, afr_getxattr_unwind,
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
+ if (cluster_getmarkerattr (frame, this, loc,
+ name, local,
+ afr_getxattr_unwind,
sub_volumes,
priv->child_count,
- MARKER_UUID_TYPE,
+ MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
priv->vol_uuid)) {
-
gf_log (this->name, GF_LOG_INFO,
"%s: failed to get marker attr (%s)",
loc->path, name);
@@ -827,65 +1586,187 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
+ }
- /*
- * if we are doing getxattr with pathinfo as the key then we
- * collect information from all childs
- */
- if (strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0) {
-
- local->call_count = priv->child_count;
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, afr_getxattr_pathinfo_cbk,
- (void *) (long) i,
- children[i], children[i]->fops->getxattr,
- loc, name);
- }
+no_name:
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- return 0;
- }
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- if (*priv->vol_uuid) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->getxattr,
+ loc, name, xdata);
- local->marker.call_count = priv->child_count;
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+/* {{{ fgetxattr */
- *(sub_volumes + i) = trav->xlator;
- }
+int32_t
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- if (cluster_getmarkerattr (frame, this, loc,
- name, local,
- afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
+ priv = this->private;
+ children = priv->children;
- return 0;
- }
- }
+ local = frame->local;
+
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->fgetxattr,
+ local->fd,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static void
+afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, fd_t *fd,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->fgetxattr,
+ fd, name, NULL);
+ }
+
+ return;
+}
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ frame->local = local;
+
+ op_ret = afr_local_init (local, priv, &op_errno);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fd = fd_ref (fd);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_frm_all_children (this, frame, name,
+ fd, cbk);
+ return 0;
+ }
+
+
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
op_ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
@@ -896,16 +1777,17 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
(void *) (long) call_child,
children[call_child],
- children[call_child]->fops->getxattr,
- loc, name);
+ children[call_child]->fops->fgetxattr,
+ fd, name, xdata);
op_ret = 0;
out:
if (op_ret == -1) {
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
+ NULL);
}
return 0;
}
@@ -931,7 +1813,7 @@ int32_t
afr_readv_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -972,13 +1854,15 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->readv,
local->fd, local->cont.readv.size,
- local->cont.readv.offset);
+ local->cont.readv.offset,
+ local->cont.readv.flags,
+ NULL);
}
out:
if (unwind) {
AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
+ vector, count, buf, iobref, xdata);
}
return 0;
@@ -987,15 +1871,15 @@ out:
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
xlator_t ** children = NULL;
int call_child = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1005,48 +1889,50 @@ afr_readv (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
+ ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
&local->cont.readv.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
local->fd = fd_ref (fd);
- local->cont.readv.ino = fd->inode->ino;
local->cont.readv.size = size;
local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+
+ afr_open_fd_fix (fd, this);
STACK_WIND_COOKIE (frame, afr_readv_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, 0, NULL,
- NULL);
+ if (ret < 0) {
+ AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
+ NULL, NULL);
}
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 5479cfbd5..e4091a793 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,30 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index c292b7493..c1ec69a55 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,46 +37,153 @@
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal-common.h"
+
+void
+__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child,
+ xlator_t *this, int32_t *op_ret, int32_t *op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (*op_ret, *op_errno)) {
+ local->child_errno[child_index] = *op_errno;
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ if (*op_errno != EFBIG)
+ afr_transaction_fop_failed (frame, this,
+ child_index);
+ break;
+ default:
+ afr_transaction_fop_failed (frame, this, child_index);
+ break;
+ }
+ local->op_errno = *op_errno;
+ goto out;
+ }
+
+ if ((local->success_count == 0) || (read_child == child_index)) {
+ local->op_ret = *op_ret;
+ if (prebuf)
+ local->cont.inode_wfop.prebuf = *prebuf;
+ if (postbuf)
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+out:
+ return;
+}
/* {{{ writev */
-int
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
+{
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+}
+
+void
afr_writev_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ local = frame->local;
+
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+}
+
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
local = frame->local;
LOCK (&frame->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
+ fop_frame = local->transaction.main_frame;
local->transaction.main_frame = NULL;
}
UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf);
+ return fop_frame;
+}
+
+int
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *fop_frame = NULL;
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
}
return 0;
}
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
int
afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
int child_index = (long) cookie;
int call_count = -1;
int read_child = 0;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
local = frame->local;
+ priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
@@ -95,37 +193,85 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+
+ /* stage the best case return value for unwind */
+ if ((local->success_count == 0) || (op_ret > local->op_ret)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret != -1) {
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if ((ret == 0) &&
+ (open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
}
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
- }
-
- local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ afr_writev_handle_short_writes (frame, this);
+ if (afr_any_fops_failed (local, priv)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
}
return 0;
}
-
int
afr_writev_wind (call_frame_t *frame, xlator_t *this)
{
@@ -133,11 +279,14 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int i = 0;
int call_count = -1;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -145,9 +294,31 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->transaction.unwind(frame, this);
+ local->transaction.resume(frame, this);
+ return 0;
+ }
+
+ xdata = dict_new ();
+ if (xdata) {
+ ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ sizeof (uint32_t));
+ ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ 0);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+ }
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -156,13 +327,18 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
local->cont.writev.vector,
local->cont.writev.count,
local->cont.writev.offset,
- local->cont.writev.iobref);
+ local->cont.writev.flags,
+ local->cont.writev.iobref,
+ xdata);
if (!--call_count)
break;
}
}
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
@@ -188,10 +364,10 @@ afr_writev_done (call_frame_t *frame, xlator_t *this)
int
afr_do_writev (call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
local = frame->local;
@@ -202,7 +378,7 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
}
transaction_frame->local = local;
- frame->local = NULL;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local->op = GF_FOP_WRITE;
@@ -210,10 +386,17 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->transaction.fop = afr_writev_wind;
local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
local->transaction.start = 0;
local->transaction.len = 0;
} else {
@@ -222,32 +405,137 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->cont.writev.count);
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
}
return 0;
}
+static void
+afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ char *reason = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: "
+ "fd: %p, inode: %p", fd,
+ fd ? fd->inode : NULL);
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, this->private, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ sh = &local->self_heal;
+ sh->do_metadata_self_heal = _gf_true;
+ if (fd->inode->ia_type == IA_IFREG)
+ sh->do_data_self_heal = _gf_true;
+ else if (fd->inode->ia_type == IA_IFDIR)
+ sh->do_entry_self_heal = _gf_true;
+
+ reason = "subvolume came online";
+ afr_launch_self_heal (frame, this, fd->inode, _gf_true,
+ fd->inode->ia_type, reason, NULL, NULL);
+ return;
+out:
+ AFR_STACK_DESTROY (frame);
+}
+
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this)
+{
+ int ret = 0;
+ int i = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t need_self_heal = _gf_false;
+ int *need_open = NULL;
+ size_t need_open_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!afr_is_fd_fixable (fd))
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ LOCK (&fd->lock);
+ {
+ if (fd_ctx->up_count < priv->up_count) {
+ need_self_heal = _gf_true;
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+ }
+
+ need_open = alloca (priv->child_count * sizeof (*need_open));
+ for (i = 0; i < priv->child_count; i++) {
+ need_open[i] = 0;
+ if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED)
+ continue;
+
+ if (!priv->child_up[i])
+ continue;
+
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+
+ need_open[i] = 1;
+ need_open_count++;
+ }
+ }
+ UNLOCK (&fd->lock);
+ if (ret)
+ goto out;
+
+ if (need_self_heal)
+ afr_trigger_open_fd_self_heal (fd, this);
+
+ if (!need_open_count)
+ goto out;
+
+ afr_fix_open (this, fd, need_open_count, need_open);
+out:
+ return;
+}
int
afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
int ret = -1;
- int op_ret = -1;
int op_errno = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -255,43 +543,41 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
goto out;
}
- frame->local = local;
+ QUORUM_CHECK(writev,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.writev.vector = iov_dup (vector, count);
local->cont.writev.count = count;
local->cont.writev.offset = offset;
- local->cont.writev.ino = fd->inode->ino;
+ local->cont.writev.flags = flags;
local->cont.writev.iobref = iobref_ref (iobref);
local->fd = fd_ref (fd);
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- goto out;
- }
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ afr_open_fd_fix (fd, this);
- if (fd_ctx->up_count < priv->up_count) {
- local->openfd_flush_cbk = afr_do_writev;
- afr_openfd_flush (frame, this, fd);
- } else {
- afr_do_writev (frame, this);
- }
+ afr_do_writev (frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -320,8 +606,9 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -331,17 +618,14 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
int
afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int read_child = 0;
int call_count = -1;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
@@ -351,38 +635,22 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -401,7 +669,8 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -409,15 +678,17 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->truncate,
&local->loc,
- local->cont.truncate.offset);
+ local->cont.truncate.offset,
+ NULL);
if (!--call_count)
break;
@@ -445,13 +716,12 @@ afr_truncate_done (call_frame_t *frame, xlator_t *this)
int
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -460,25 +730,22 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(truncate,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
local->cont.truncate.offset = offset;
- local->cont.truncate.ino = loc->inode->ino;
local->transaction.fop = afr_truncate_wind;
local->transaction.done = afr_truncate_done;
@@ -490,14 +757,18 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
local->transaction.start = offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -528,8 +799,9 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
}
@@ -538,17 +810,14 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
int
afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int call_count = -1;
- int need_unwind = 0;
int read_child = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
@@ -558,38 +827,22 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -608,7 +861,8 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -616,14 +870,17 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
+ local->fd,
+ local->cont.ftruncate.offset,
+ NULL);
if (!--call_count)
break;
@@ -678,14 +935,19 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
local->transaction.start = local->cont.ftruncate.offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
}
return 0;
@@ -694,16 +956,13 @@ out:
int
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
int op_errno = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -711,41 +970,33 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
-
- if (ret < 0) {
- op_errno = -ret;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
goto out;
}
+ QUORUM_CHECK(ftruncate,out);
- frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.ftruncate.offset = offset;
- local->cont.ftruncate.ino = fd->inode->ino;
local->fd = fd_ref (fd);
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- goto out;
- }
+ afr_open_fd_fix (fd, this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ afr_do_ftruncate (frame, this);
- if (fd_ctx->up_count < priv->up_count) {
- local->openfd_flush_cbk = afr_do_ftruncate;
- afr_openfd_flush (frame, this, fd);
- } else {
- afr_do_ftruncate (frame, this);
- }
-
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -774,8 +1025,9 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -785,7 +1037,7 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
int
afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -805,29 +1057,14 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
-
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
-
- local->success_count++;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -855,7 +1092,8 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -865,14 +1103,15 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setattr,
&local->loc,
&local->cont.setattr.in_buf,
- local->cont.setattr.valid);
+ local->cont.setattr.valid,
+ NULL);
if (!--call_count)
break;
@@ -900,13 +1139,12 @@ afr_setattr_done (call_frame_t *frame, xlator_t *this)
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -915,24 +1153,20 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(setattr,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
-
- local->cont.setattr.ino = loc->inode->ino;
local->cont.setattr.in_buf = *buf;
local->cont.setattr.valid = valid;
@@ -947,14 +1181,18 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -981,8 +1219,9 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -992,7 +1231,7 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
int
afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -1012,29 +1251,14 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
-
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1062,7 +1286,8 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1072,14 +1297,15 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fsetattr,
local->fd,
&local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid);
+ local->cont.fsetattr.valid,
+ NULL);
if (!--call_count)
break;
@@ -1104,16 +1330,14 @@ afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid)
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1122,24 +1346,25 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(fsetattr,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- transaction_frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- local->op_ret = -1;
-
- local->cont.fsetattr.ino = fd->inode->ino;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.fsetattr.in_buf = *buf;
local->cont.fsetattr.valid = valid;
@@ -1150,18 +1375,24 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
local->fd = fd_ref (fd);
+ afr_open_fd_fix (fd, this);
+
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -1189,38 +1420,34 @@ afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ local->op_ret, local->op_errno,
+ NULL);
+ }
return 0;
}
int
afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1240,15 +1467,16 @@ afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1258,14 +1486,15 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setxattr,
&local->loc,
local->cont.setxattr.dict,
- local->cont.setxattr.flags);
+ local->cont.setxattr.flags,
+ NULL);
if (!--call_count)
break;
@@ -1279,7 +1508,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
int
afr_setxattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = frame->local;
local->transaction.unwind (frame, this);
@@ -1290,37 +1519,40 @@ afr_setxattr_done (call_frame_t *frame, xlator_t *this)
int
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ int ret = -1;
+ int op_errno = EINVAL;
- VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+
+ QUORUM_CHECK(setxattr,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- transaction_frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- local->op_ret = -1;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.setxattr.dict = dict_ref (dict);
local->cont.setxattr.flags = flags;
@@ -1335,14 +1567,211 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* {{{ fsetxattr */
+
+
+int
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fsetxattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
+
+
+int
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsetxattr,
+ local->fd,
+ local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
+ QUORUM_CHECK(fsetxattr,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
+
+ local->transaction.fop = afr_fsetxattr_wind;
+ local->transaction.done = afr_fsetxattr_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
}
return 0;
@@ -1350,6 +1779,7 @@ out:
/* }}} */
+
/* {{{ removexattr */
@@ -1371,38 +1801,34 @@ afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ local->op_ret, local->op_errno,
+ NULL);
+ }
return 0;
}
int
afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1430,7 +1856,8 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1440,13 +1867,14 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->removexattr,
&local->loc,
- local->cont.removexattr.name);
+ local->cont.removexattr.name,
+ NULL);
if (!--call_count)
break;
@@ -1472,7 +1900,192 @@ afr_removexattr_done (call_frame_t *frame, xlator_t *this)
int
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+
+ QUORUM_CHECK(removexattr,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.removexattr.name = gf_strdup (name);
+
+ local->transaction.fop = afr_removexattr_wind;
+ local->transaction.done = afr_removexattr_done;
+ local->transaction.unwind = afr_removexattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* ffremovexattr */
+int
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fremovexattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
+
+
+int
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fremovexattr,
+ local->fd,
+ local->cont.removexattr.name,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -1481,21 +2094,33 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
int op_ret = -1;
int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
+ QUORUM_CHECK(fremovexattr, out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- ret = AFR_LOCAL_INIT (local, priv);
+ ret = afr_local_init (local, priv, &op_errno);
if (ret < 0) {
op_errno = -ret;
goto out;
@@ -1507,25 +2132,730 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
local->cont.removexattr.name = gf_strdup (name);
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ local->transaction.fop = afr_fremovexattr_wind;
+ local->transaction.done = afr_fremovexattr_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fallocate,
+ local->fd,
+ local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_fallocate (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_FALLOCATE;
+
+ local->transaction.fop = afr_fallocate_wind;
+ local->transaction.done = afr_fallocate_done;
+ local->transaction.unwind = afr_fallocate_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
+
+ /* fallocate can modify the file size */
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(fallocate,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_fallocate (frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ discard */
+
+static int
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
}
return 0;
}
+
+static int
+afr_discard_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->discard,
+ local->fd,
+ local->cont.discard.offset,
+ local->cont.discard.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_discard_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_discard (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_DISCARD;
+
+ local->transaction.fop = afr_discard_wind;
+ local->transaction.done = afr_discard_done;
+ local->transaction.unwind = afr_discard_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(discard, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_discard(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+
+/* {{{ zerofill */
+
+static int
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.zerofill.prebuf,
+ &local->cont.zerofill.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->zerofill,
+ local->fd,
+ local->cont.zerofill.offset,
+ local->cont.zerofill.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_zerofill(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_ZEROFILL;
+
+ local->transaction.fop = afr_zerofill_wind;
+ local->transaction.done = afr_zerofill_done;
+ local->transaction.unwind = afr_zerofill_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this,
+ AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(zerofill, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ goto out;
+ }
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_zerofill(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index f9aa7bd36..8e93ca44a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,51 +13,70 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid);
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid);
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 17651add9..060d78f35 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
@@ -31,8 +22,69 @@
#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
#define LOCKED_LOWER 0x2 /* for lower path */
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
static uint64_t afr_lock_number = 1;
@@ -57,12 +109,13 @@ afr_set_lock_number (call_frame_t *frame, xlator_t *this)
}
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this)
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
{
gf_log (this->name, GF_LOG_TRACE,
"Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)frame->root);
- frame->root->lk_owner = (uint64_t) (unsigned long)frame->root;
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
}
static int
@@ -88,8 +141,7 @@ is_afr_lock_selfheal (afr_local_t *local)
}
int32_t
-internal_lock_count (call_frame_t *frame, xlator_t *this,
- afr_fd_ctx_t *fd_ctx)
+internal_lock_count (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -99,17 +151,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this,
local = frame->local;
priv = this->private;
- if (fd_ctx) {
- GF_ASSERT (local->fd);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
}
return call_count;
@@ -117,7 +161,7 @@ internal_lock_count (call_frame_t *frame, xlator_t *this,
static void
afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -165,11 +209,11 @@ afr_print_inodelk (char *str, int size, int cmd,
}
snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
@@ -185,11 +229,11 @@ afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
void
afr_print_entrylk (char *str, int size, const char *basename,
- uint64_t owner)
+ gf_lkowner_t *owner)
{
- snprintf (str, size, "Basename=%s, lk-owner=%llu",
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
basename ? basename : "<nul>",
- (unsigned long long)owner);
+ lkowner_utoa (owner));
}
static void
@@ -243,27 +287,20 @@ afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int op_ret, int op_errno, int32_t child_index)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
@@ -272,39 +309,31 @@ afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lockee,
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
(unsigned long long) int_lock->lock_number);
}
static void
-afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int32_t cmd, int32_t child_index)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
- afr_print_inodelk (lock, 256, cmd, flock, frame->root->lk_owner);
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
@@ -319,20 +348,21 @@ afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
+ int32_t cookie)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -340,36 +370,41 @@ afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_entrylk (lock, 256, basename, frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
static void
-afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename, int op_ret,
- int op_errno, int32_t child_index)
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -377,20 +412,25 @@ afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
verdict,
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
@@ -443,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local)
return ret;
}
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
static int
initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
{
@@ -460,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
}
return 0;
@@ -473,19 +559,23 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
- int i = 0;
+ afr_inodelk_t *inodelk = NULL;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
return 0;
}
@@ -495,7 +585,7 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
{
int ret = 0;
- ret = strcmp (l1->path, l2->path);
+ ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
if (ret == 0)
ret = strcmp (b1, b2);
@@ -507,6 +597,18 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
}
int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+{
+ int call_count = 0;
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
+
+ return call_count;
+}
+
+int
afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
{
@@ -524,7 +626,7 @@ afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
/* FIXME: What if UNLOCK fails */
static int32_t
afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -550,23 +652,37 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
local = frame->local;
+ int_lock = &local->internal_lock;
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ op_errno, child_index);
+
+ priv = this->private;
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unlock failed %s",
- local->loc.path, strerror (op_errno));
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
}
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
@@ -576,21 +692,30 @@ static int
afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
int call_count = 0;
int i = 0;
+ int piggyback = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
+ full_flock.l_type = F_UNLCK;
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
priv->child_count);
int_lock->lk_call_count = call_count;
@@ -602,55 +727,107 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
goto out;
}
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
for (i = 0; i < priv->child_count; i++) {
- if (int_lock->inode_locked_nodes[i] & LOCKED_YES) {
- if (local->fd) {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
+ continue;
- if (!--call_count)
- break;
+ if (local->fd) {
+ flock_use = &flock;
+ if (!local->transaction.eager_lock[i]) {
+ goto wind;
+ }
- } else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
+ piggyback = 0;
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_piggyback[i]) {
+ fd_ctx->lock_piggyback[i]--;
+ piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+ if (piggyback) {
+ afr_unlock_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
if (!--call_count)
break;
-
+ continue;
}
- }
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
- }
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+
+ if (!--call_count)
+ break;
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+ if (!--call_count)
+ break;
+ }
+ }
out:
return 0;
}
static int32_t
afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unlock failed on %d, reason: %s",
+ local->loc.path, child_index, strerror (op_errno));
+ }
+
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
return 0;
}
@@ -658,24 +835,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int
afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
+ call_count = afr_lockee_locked_nodes_count (int_lock);
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
int_lock->lk_call_count = call_count;
if (!call_count){
@@ -685,18 +860,23 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
if (!--call_count)
break;
@@ -710,17 +890,21 @@ out:
static int32_t
afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int done = 0;
- int child_index = (long) cookie;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
LOCK (&frame->lock);
{
@@ -732,13 +916,13 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"please load features/locks xlator on server");
local->op_ret = op_ret;
int_lock->lock_op_ret = op_ret;
- done = 1;
}
- local->child_up[child_index] = 0;
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -747,11 +931,17 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_unlock (frame, this);
} else {
if (op_ret == 0) {
- int_lock->locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->lock_count++;
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
}
- afr_lock_blocking (frame, this, child_index + 1);
+ afr_lock_blocking (frame, this, cky + 1);
}
return 0;
@@ -759,104 +949,26 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->child_up[child_index] = 0;
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
-out:
- return 0;
-}
-
-static int32_t
afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long)cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -864,6 +976,7 @@ static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -874,18 +987,16 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
break;
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
break;
}
@@ -893,28 +1004,67 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
}
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
struct gf_flock flock = {0,};
uint64_t ctx = 0;
int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
if (local->fd) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -933,46 +1083,26 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index]
- || !fd_ctx->opened_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
}
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
- afr_copy_locked_nodes (frame, this);
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_unlock(frame, this);
+ afr_copy_locked_nodes (frame, this);
- return 0;
+ afr_unlock(frame, this);
+ return 0;
+ }
}
- if ((child_index == priv->child_count)
- || (int_lock->lock_count ==
- afr_up_children_count (priv->child_count,
- local->child_up))) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,
@@ -985,12 +1115,18 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
if (local->fd) {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -998,11 +1134,12 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1010,79 +1147,50 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
- }
-
case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
if (local->fd) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
} else {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, local->transaction.basename,
child_index);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
}
break;
}
return 0;
-
-
}
int32_t
@@ -1091,6 +1199,7 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int up_count = 0;
priv = this->private;
local = frame->local;
@@ -1104,6 +1213,11 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
+ up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
initialize_entrylk_variables (frame, this);
break;
}
@@ -1115,59 +1229,68 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
static int32_t
afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = 0;
int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
op_errno, (long) cookie);
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
/* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
- local->child_up[child_index] = 0;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
+ call_count = --int_lock->lk_call_count;
}
+ UNLOCK (&frame->lock);
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last locking reply received");
- /* all locks successfull. Proceed to call FOP */
+ /* all locks successful. Proceed to call FOP */
if (int_lock->entrylk_lock_count ==
- afr_up_children_count (priv->child_count, local->child_up)) {
+ int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
int_lock->lock_cbk (frame, this);
}
- /* Not all locks were successfull. Unlock and try locking
+ /* Not all locks were successful. Unlock and try locking
again, this time with serially blocking locks */
else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1184,31 +1307,26 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int32_t call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
int i = 0;
- uint64_t ctx = 0;
- int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
initialize_entrylk_variables (frame, this);
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
gf_log (this->name, GF_LOG_INFO,
"unable to get fd ctx for fd=%p",
local->fd);
@@ -1218,13 +1336,13 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
return -1;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- call_count = internal_lock_count (frame, this, fd_ctx);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
if (!call_count) {
gf_log (this->name, GF_LOG_INFO,
@@ -1235,41 +1353,52 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking entrylk calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
this->name, local->fd,
- basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
}
}
} else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this, NULL);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
if (!--call_count)
break;
-
}
}
}
@@ -1279,58 +1408,75 @@ out:
int32_t
afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = 0;
int child_index = (long) cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
LOCK (&frame->lock);
{
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
call_count = --int_lock->lk_call_count;
}
UNLOCK (&frame->lock);
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- local->child_up[child_index] = 0;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
- }
-
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last inode locking reply received");
- /* all locks successfull. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- afr_up_children_count (priv->child_count, local->child_up)) {
+ /* all locks successful. Proceed to call FOP */
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
int_lock->lock_cbk (frame, this);
}
- /* Not all locks were successfull. Unlock and try locking
+ /* Not all locks were successful. Unlock and try locking
again, this time with serially blocking locks */
else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1348,29 +1494,35 @@ int
afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
- int32_t call_count = 0;
- uint64_t ctx = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock = {0,};
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+
+ full_flock.l_type = inodelk->flock.l_type;
initialize_inodelk_variables (frame, this);
if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
gf_log (this->name, GF_LOG_INFO,
"unable to get fd ctx for fd=%p",
local->fd);
@@ -1380,14 +1532,14 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
ret = -1;
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- call_count = internal_lock_count (frame, this, fd_ctx);
+ call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
if (!call_count) {
gf_log (this->name, GF_LOG_INFO,
@@ -1399,258 +1551,77 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking inodelk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i]) {
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
-
- if (!--call_count)
- break;
+ if (!local->child_up[i])
+ continue;
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
+ goto wind;
}
- }
- } else {
- call_count = internal_lock_count (frame, this, NULL);
- int_lock->lk_call_count = call_count;
+ piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLK, i);
+ afr_set_delayed_post_op (frame, this);
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_acquired[i]) {
+ fd_ctx->lock_piggyback[i]++;
+ piggyback = 1;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+ if (piggyback) {
+ /* (op_ret == 1) => indicate piggybacked lock */
+ afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
if (!--call_count)
break;
-
+ continue;
}
- }
- }
-
-out:
- return ret;
-}
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, flock_use, F_SETLK, i);
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
-
- return count;
-
-}
-
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
-
- return count;
-
-}
-
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
+ if (!--call_count)
+ break;
+ }
+ } else {
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLK, i);
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
-
}
}
-
out:
- return 0;
-
-}
-
-
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
- return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
-
+ return ret;
}
int32_t
@@ -1664,10 +1635,8 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
if (is_afr_lock_transaction (local))
afr_unlock_inodelk (frame, this);
else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
+ afr_unlock_entrylk (frame, this);
+
} else {
if (is_afr_lock_selfheal (local))
afr_unlock_inodelk (frame, this);
@@ -1836,10 +1805,12 @@ out:
int32_t
afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock);
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
int32_t
afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -1863,7 +1834,7 @@ afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(void *) (long) source_child,
priv->children[source_child],
priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
+ local->fd, F_GETLK_FD, &flock, NULL);
return 0;
@@ -1891,7 +1862,7 @@ afr_recover_lock (call_frame_t *frame, xlator_t *this,
(void *) (long) lock_recovery_child,
priv->children[lock_recovery_child],
priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock);
+ local->fd, F_SETLK, flock, NULL);
return 0;
}
@@ -1909,7 +1880,8 @@ is_afr_lock_eol (struct gf_flock *lock)
int32_t
afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
if (op_ret) {
gf_log (this->name, GF_LOG_INFO,
@@ -1969,7 +1941,7 @@ afr_lock_recovery (call_frame_t *frame, xlator_t *this)
(void *) (long) source_child,
priv->children[source_child],
priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
+ local->fd, F_GETLK_FD, &flock, NULL);
out:
return ret;
@@ -1989,7 +1961,7 @@ afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
fdctx = (afr_fd_ctx_t *) (long) tmp;
- fdctx->opened_on[child_index] = 1;
+ fdctx->opened_on[child_index] = AFR_FD_OPENED;
out:
return ret;
@@ -1997,7 +1969,8 @@ out:
int32_t
afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
int32_t child_index = (long )cookie;
int ret = 0;
@@ -2049,7 +2022,12 @@ afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
GF_ASSERT (local && local->fd);
ret = fd_ctx_get (local->fd, this, &tmp);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context of fd",
+ uuid_utoa (local->fd->inode->gfid));
fdctx = (afr_fd_ctx_t *) (long) tmp;
+ /* TODO: instead we should return from the function */
GF_ASSERT (fdctx);
child_index = local->lock_recovery_child;
@@ -2064,8 +2042,7 @@ afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
(void *)(long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd,
- fdctx->wbflags);
+ &loc, fdctx->flags, local->fd, NULL);
return 0;
}
@@ -2083,7 +2060,7 @@ is_fd_opened (fd_t *fd, int32_t child_index)
fdctx = (afr_fd_ctx_t *) (long) tmp;
- if (fdctx->opened_on[child_index])
+ if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
ret = 1;
out:
@@ -2093,13 +2070,14 @@ out:
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
{
- call_frame_t *frame = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
afr_locked_fd_t *tmp = NULL;
- int ret = 0;
+ int ret = -1;
struct list_head locks_list = {0,};
+ int32_t op_errno = 0;
priv = this->private;
@@ -2113,15 +2091,10 @@ afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
goto out;
}
- local = GF_CALLOC (1, sizeof (*local),
- gf_afr_mt_afr_local_t);
- if (!local) {
- ret = -1;
- goto out;
- }
-
- AFR_LOCAL_INIT (local, priv);
- if (!local) {
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
ret = -1;
goto out;
}
@@ -2159,5 +2132,43 @@ afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
}
out:
+ if ((ret < 0) && frame)
+ AFR_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
+
+ src_local = src->local;
+ src_lock = &src_local->internal_lock;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 98e865740..73594f265 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,7 +17,6 @@
enum gf_afr_mem_types_ {
gf_afr_mt_iovec = gf_common_mt_end + 1,
gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_local_t,
gf_afr_mt_afr_private_t,
gf_afr_mt_int32_t,
gf_afr_mt_char,
@@ -43,6 +33,18 @@ enum gf_afr_mem_types_ {
gf_afr_mt_pump_priv,
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
+ gf_afr_fd_paused_call_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_stats_t,
+ gf_afr_mt_shd_crawl_event_t,
+ gf_afr_mt_uint64_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 306f5a85a..643a5d692 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -55,16 +46,84 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+int
+afr_stale_child_up (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int up = -1;
+
+ priv = this->private;
+
+ if (!local->fresh_children)
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
+ if (priv->child_count == afr_get_children_count (local->fresh_children,
+ priv->child_count))
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ if (afr_is_child_present (local->fresh_children,
+ priv->child_count, i))
+ continue;
+ up = i;
+ break;
+ }
+out:
+ return up;
+}
+
+void
+afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ inode_t *inode = NULL;
+ int st_child = -1;
+ char reason[64] = {0};
+
+ local = frame->local;
+ sh = &local->self_heal;
+ inode = local->fd->inode;
+
+ if (!IA_ISREG (inode->ia_type))
+ goto out;
+
+ st_child = afr_stale_child_up (local, this);
+ if (st_child < 0)
+ goto out;
+
+ sh->do_data_self_heal = _gf_true;
+ sh->do_metadata_self_heal = _gf_true;
+ sh->do_gfid_self_heal = _gf_true;
+ sh->do_missing_entry_self_heal = _gf_true;
+
+ snprintf (reason, sizeof (reason), "stale subvolume %d detected",
+ st_child);
+ afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
+ reason, NULL, NULL);
+out:
+ return;
+}
int
afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = frame->local;
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ local->fd, xdata);
return 0;
}
@@ -72,15 +131,15 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_open_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_local_t * local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
int call_count = -1;
int child_index = (long) cookie;
+ afr_private_t *priv = NULL;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
@@ -93,32 +152,13 @@ afr_open_cbk (call_frame_t *frame, void *cookie,
local->op_ret = op_ret;
local->success_count++;
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set fd ctx for fd=%p", fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
+ ret = afr_child_fd_ctx_set (this, fd, child_index,
+ local->cont.open.flags);
+ if (ret) {
+ local->op_ret = -1;
local->op_errno = -ret;
goto unlock;
}
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
- fd_ctx->flags = local->cont.open.flags;
- fd_ctx->wbflags = local->cont.open.wbflags;
}
}
unlock:
@@ -131,29 +171,30 @@ unlock:
&& (local->op_ret >= 0)) {
STACK_WIND (frame, afr_open_ftruncate_cbk,
this, this->fops->ftruncate,
- fd, 0);
+ fd, 0, NULL);
} else {
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, xdata);
}
}
return 0;
}
-
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
int i = 0;
int ret = -1;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t wind_flags = flags & (~O_TRUNC);
+ //We can't let truncation to happen outside transaction.
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -162,6 +203,10 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
priv = this->private;
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
+ }
+
if (afr_is_split_brain (this, loc->inode)) {
/* self-heal failed */
gf_log (this->name, GF_LOG_WARNING,
@@ -170,21 +215,17 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- frame->local = local;
call_count = local->call_count;
-
loc_copy (&local->loc, loc);
local->cont.open.flags = flags;
- local->cont.open.wbflags = wbflags;
local->fd = fd_ref (fd);
@@ -193,462 +234,149 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->open,
- loc, wind_flags, fd, wbflags);
+ loc, wind_flags, fd, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
return 0;
}
-
int
-afr_openfd_sh_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- uint64_t ctx = 0;
- int ret = 0;
- int call_count = 0;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", fd);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
-
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened successfully on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
- }
- }
-out:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- int_lock->lock_cbk = local->transaction.done;
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-
-static int
-__unopened_count (int child_count, unsigned int *opened_on, unsigned char *child_up)
-{
- int i = 0;
- int count = 0;
-
- for (i = 0; i < child_count; i++) {
- if (!opened_on[i] && child_up[i])
- count++;
- }
-
- return count;
-}
-
-
-int
-afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
- int abandon = 0;
- int ret = 0;
- int i = 0;
- int call_count = 0;
+ int call_count = 0;
+ int child_index = (long) cookie;
- priv = this->private;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
- /*
- * Some subvolumes might have come up on which we never
- * opened this fd in the first place. Re-open fd's on those
- * subvolumes now.
- */
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ }
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- local->fd, local->loc.path);
- abandon = 1;
+ "failed to get fd context, %p", local->fd);
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
LOCK (&local->fd->lock);
{
- call_count = __unopened_count (priv->child_count,
- fd_ctx->opened_on,
- local->child_up);
- for (i = 0; i < priv->child_count; i++) {
- fd_ctx->pre_op_done[i] = 0;
- fd_ctx->pre_op_piggyback[i] = 0;
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
}
UNLOCK (&local->fd->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "fd not open on any subvolume %p (%s)",
- local->fd, local->loc.path);
- abandon = 1;
- goto out;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!fd_ctx->opened_on[i] && local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening fd for %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_openfd_sh_open_cbk,
- (void *)(long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags, local->fd,
- fd_ctx->wbflags);
-
- if (!--call_count)
- break;
- }
- }
-
out:
- if (abandon)
- local->transaction.resume (frame, this);
-
- return 0;
-}
-
-
-static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
-{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
-
- if ((!fd) || (!fd->inode))
- return -1;
-
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
-
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
-
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
-
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
-
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
- }
-
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
-
- return 0;
-}
-
-
-int
-afr_openfd_sh (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- char sh_type_str[256] = {0,};
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (local->loc.path);
- /* forcibly trigger missing-entries self-heal */
-
- sh->need_missing_entry_self_heal = _gf_true;
- sh->need_gfid_self_heal = _gf_true;
- sh->data_lock_held = _gf_true;
- sh->need_data_self_heal = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_openfd_sh_unwind;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_INFO, "%s self-heal triggered. "
- "path: %s, reason: Replicate up down flush, data lock is held",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this, local->fd->inode);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
return 0;
}
-
-int
-afr_openfd_flush_done (call_frame_t *frame, xlator_t *this)
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- int _ret = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
priv = this->private;
- local = frame->local;
-
- LOCK (&local->fd->lock);
- {
- _ret = __fd_ctx_get (local->fd, this, &ctx);
- if (_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- local->fd, local->loc.path);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->down_count = priv->down_count;
- fd_ctx->up_count = priv->up_count;
- }
-out:
- UNLOCK (&local->fd->lock);
-
- afr_local_transaction_cleanup (local, this);
-
- gf_log (this->name, GF_LOG_TRACE,
- "The up/down flush is over");
-
- fd_unref (local->fd);
- local->openfd_flush_cbk (frame, this);
-
- return 0;
-}
-
-
-
-int
-afr_openfd_xaction (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_local_t * local = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- local = frame->local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_openfd_sh;
- local->transaction.done = afr_openfd_flush_done;
-
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- gf_log (this->name, GF_LOG_TRACE,
- "doing up/down flush on fd=%p", fd);
-
- afr_transaction (frame, this, AFR_DATA_TRANSACTION);
-
-out:
- return 0;
-}
-
-
-int
-afr_openfd_xaction_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- fd, local->loc.path);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
+ if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count)
+ goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened successfully on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ ret = -1;
+ goto out;
}
-out:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_openfd_xaction (frame, this, local->fd);
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
- return 0;
-}
-
-
-int
-afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int no_open = 0;
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- priv = this->private;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
-
- /*
- * If the file is already deleted while the fd is open, no need to
- * perform the openfd flush, call the flush_cbk and get out.
- */
- ret = afr_prepare_loc (frame, fd);
- if (ret < 0) {
- local->openfd_flush_cbk (frame, this);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- /*
- * Some subvolumes might have come up on which we never
- * opened this fd in the first place. Re-open fd's on those
- * subvolumes now.
- */
-
- local->fd = fd_ref (fd);
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- fd, local->loc.path);
- no_open = 1;
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
goto out;
- }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ local->fd = fd_ref (fd);
+ local->call_count = need_open_count;
- LOCK (&local->fd->lock);
- {
- call_count = __unopened_count (priv->child_count,
- fd_ctx->opened_on,
- local->child_up);
- }
- UNLOCK (&local->fd->lock);
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd",
+ need_open_count);
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "fd not open on any subvolume %p (%s)",
- fd, local->loc.path);
- no_open = 1;
- goto out;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
- local->call_count = call_count;
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
- for (i = 0; i < priv->child_count; i++) {
- if (!fd_ctx->opened_on[i] && local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening fd for %s on subvolume %s",
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, local->fd,
+ NULL);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for file %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (frame, afr_openfd_xaction_open_cbk,
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
(void *)(long) i,
priv->children[i],
priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags, fd,
- fd_ctx->wbflags);
-
- if (!--call_count)
- break;
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
- }
+ }
+ op_errno = 0;
+ ret = 0;
out:
- if (no_open)
- afr_openfd_xaction (frame, this, fd);
-
- return 0;
+ if (op_errno)
+ ret = -1; //For handling ALLOC_OR_GOTO
+ if (ret && frame)
+ AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 04b388fe0..83846f152 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -1,23 +1,15 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <openssl/md5.h>
#include "glusterfs.h"
#include "afr.h"
#include "xlator.h"
@@ -33,7 +25,6 @@
#include "compat-errno.h"
#include "compat.h"
#include "byte-order.h"
-#include "md5.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
@@ -44,303 +35,289 @@
This file contains the various self-heal algorithms
*/
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno);
+static int
+sh_destroy_frame (call_frame_t *frame, xlator_t *this)
+{
+ if (!frame)
+ goto out;
-/*
- The "full" algorithm. Copies the entire file from
- source to sinks.
-*/
-
+ AFR_STACK_DESTROY (frame);
+out:
+ return 0;
+}
static void
-sh_full_private_cleanup (call_frame_t *frame, xlator_t *this)
+sh_private_cleanup (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
local = frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
-
- if (sh_priv)
- GF_FREE (sh_priv);
+ GF_FREE (sh_priv);
}
-
-static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call);
-
static int
-sh_full_loop_driver_done (call_frame_t *frame, xlator_t *this)
+sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- sh_full_private_cleanup (frame, this);
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
- "full self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "full self-heal completed on %s",
- local->loc.path);
+ int writes = 0;
+ int i = 0;
- local->self_heal.algo_completion_cbk (frame, this);
+ for (i = 0; i < child_count; i++) {
+ if (write_needed[i])
+ writes++;
}
- return 0;
-}
-
-static int
-sh_full_loop_return (call_frame_t *rw_frame, xlator_t *this, off_t offset)
-{
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- AFR_STACK_DESTROY (rw_frame);
-
- sh_full_loop_driver (sh_frame, this, _gf_false);
- return 0;
+ return writes;
}
static int
-sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *last_loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int32_t total_blocks = 0;
+ int32_t diff_blocks = 0;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+ if (sh_priv) {
+ total_blocks = sh_priv->total_blocks;
+ diff_blocks = sh_priv->diff_blocks;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- rw_sh->offset - op_ret);
+ sh_private_cleanup (sh_frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ GF_ASSERT (!last_loop_frame);
+ //loop_finish should have happened and the old_loop should be NULL
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-heal aborting on %s",
+ local->loc.path);
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
+ local->self_heal.algo_abort_cbk (sh_frame, this);
+ } else {
+ GF_ASSERT (last_loop_frame);
+ if (diff_blocks == total_blocks) {
+ gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
+ "completed on %s",local->loc.path);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "diff self-heal on %s: completed. "
+ "(%d blocks of %d were different (%.2f%%))",
+ local->loc.path, diff_blocks, total_blocks,
+ ((diff_blocks * 1.0)/total_blocks) * 100);
}
- }
- UNLOCK (&sh_frame->lock);
-
- call_count = afr_frame_return (rw_frame);
- if (call_count == 0) {
- sh_full_loop_return (rw_frame, this, rw_sh->offset - op_ret);
+ sh->old_loop_frame = last_loop_frame;
+ local->self_heal.algo_completion_cbk (sh_frame, this);
}
return 0;
}
-
-static int
-sh_full_read_cbk (call_frame_t *rw_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
- off_t offset = (long) cookie;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- call_count = sh->active_sinks;
-
- rw_local->call_count = call_count;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, offset);
+ if (!loop_frame)
+ goto out;
- if (op_ret <= 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "read from %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[sh->source]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- sh_full_loop_return (rw_frame, this, offset);
- return 0;
+ loop_local = loop_frame->local;
+ if (loop_local) {
+ loop_sh = &loop_local->self_heal;
}
- rw_sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
- gf_log (this->name, GF_LOG_DEBUG,
- "block has all 0 filled");
- sh_full_loop_return (rw_frame, this, offset);
- goto out;
- }
+ if (loop_sh && loop_sh->data_lock_held) {
+ afr_sh_data_unlock (loop_frame, this, this->name,
+ sh_destroy_frame);
+ } else {
+ sh_destroy_frame (loop_frame, this);
}
+out:
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
+static int
+sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- STACK_WIND_COOKIE (rw_frame, sh_full_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- if (!--call_count)
- break;
- }
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
-out:
+ gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ loop_sh->data_lock_held = _gf_true;
+ loop_sh->sh_data_algo_start (loop_frame, this);
return 0;
}
+static int
+sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
+{
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+ sh_frame = loop_sh->sh_frame;
+
+ gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
+ return 0;
+}
static int
-sh_full_read_write (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *old_loop_frame, call_frame_t **loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- afr_self_heal_t *sh = NULL;
- call_frame_t *rw_frame = NULL;
- int32_t op_errno = 0;
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_ASSERT (sh_frame);
+ GF_ASSERT (loop_frame);
+
+ *loop_frame = NULL;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ new_loop_frame = copy_frame (sh_frame);
+ if (!new_loop_frame)
+ goto out;
+ //We want the frame to have same lk_owner as sh_frame
+ //so that locks translator allows conflicting locks
+ new_loop_local = afr_self_heal_local_init (local, this);
+ if (!new_loop_local)
+ goto out;
+ new_loop_frame->local = new_loop_local;
- rw_frame = copy_frame (frame);
- if (!rw_frame)
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->sources = memdup (sh->sources,
+ priv->child_count * sizeof (*sh->sources));
+ if (!new_loop_sh->sources)
+ goto out;
+ new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
+ sizeof (*new_loop_sh->write_needed),
+ gf_afr_mt_char);
+ if (!new_loop_sh->write_needed)
+ goto out;
+ new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
+ gf_afr_mt_uint8_t);
+ if (!new_loop_sh->checksum)
goto out;
+ new_loop_sh->inode = inode_ref (sh->inode);
+ new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
+ new_loop_sh->source = sh->source;
+ new_loop_sh->active_sinks = sh->active_sinks;
+ new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
+ new_loop_sh->file_has_holes = sh->file_has_holes;
+ new_loop_sh->old_loop_frame = old_loop_frame;
+ new_loop_sh->sh_frame = sh_frame;
+ *loop_frame = new_loop_frame;
+ return 0;
+out:
+ sh_destroy_frame (new_loop_frame, this);
+ return -ENOMEM;
+}
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
+static int
+sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
+ call_frame_t *old_loop_frame)
+{
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ int ret = 0;
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
+ GF_ASSERT (sh_frame);
- rw_sh->offset = offset;
- rw_sh->sh_frame = frame;
+ local = sh_frame->local;
+ sh = &local->self_heal;
- STACK_WIND_COOKIE (rw_frame, sh_full_read_cbk,
- (void *) (long) offset,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- offset);
+ ret = sh_loop_frame_create (sh_frame, this, old_loop_frame,
+ &new_loop_frame);
+ if (ret)
+ goto out;
+ new_loop_local = new_loop_frame->local;
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->offset = offset;
+ new_loop_sh->block_size = sh->block_size;
+ afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
+ _gf_true, this->name, sh_loop_lock_success, sh_loop_lock_failure);
return 0;
-
out:
- sh->op_failed = 1;
-
- sh_full_loop_driver (frame, this, _gf_false);
-
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ if (old_loop_frame)
+ sh_loop_finish (old_loop_frame, this);
+ sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
return 0;
}
-
static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call)
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
{
- afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
gf_boolean_t is_driver_done = _gf_false;
blksize_t block_size = 0;
- off_t offset = 0;
int loop = 0;
+ off_t offset = 0;
+ afr_private_t *priv = NULL;
priv = this->private;
- local = frame->local;
+ local = sh_frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
LOCK (&sh_priv->lock);
{
- if (_gf_false == is_first_call)
+ if (!is_first_call)
sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((sh->op_failed == 0) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
+ offset = sh_priv->offset;
+ block_size = sh->block_size;
+ while ((!sh->eof_reached) &&
+ (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
+ (sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh->block_size;
+ sh_priv->offset += block_size;
sh_priv->loops_running++;
- if (_gf_false == is_first_call)
+ if (!is_first_call)
break;
-
}
if (0 == sh_priv->loops_running) {
is_driver_done = _gf_true;
@@ -348,349 +325,242 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_
}
UNLOCK (&sh_priv->lock);
+ if (0 == loop) {
+ //loop finish does unlock, but the erasing of the pending
+ //xattrs needs to happen before that so do not finish the loop
+ if (is_driver_done &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ goto driver_done;
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ }
+
+ //If we have more loops to form we should finish previous loop after
+ //the next loop lock
while (loop--) {
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
// op failed in other loop, stop spawning more loops
- sh_full_loop_driver (frame, this, _gf_false);
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ sh_loop_driver (sh_frame, this, _gf_false, NULL);
} else {
- sh_full_read_write (frame, this, offset);
+ gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
+ "for offset %"PRId64, offset);
+
+ sh_loop_start (sh_frame, this, offset, old_loop_frame);
+ old_loop_frame = NULL;
offset += block_size;
}
}
+driver_done:
if (is_driver_done) {
- sh_full_loop_driver_done (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_algo_full (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv)
- goto out;
-
- LOCK_INIT (&sh_priv->lock);
-
- sh->private = sh_priv;
-
- local->call_count = 0;
-
- sh_full_loop_driver (frame, this, _gf_true);
-out:
- return 0;
-}
-
-
-/*
- * The "diff" algorithm. Copies only those blocks whose checksums
- * don't match with those of source.
- */
-
-
-static void
-sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
-
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
-
- GF_FREE (sh_priv->loops[i]);
- }
- }
-
- if (sh_priv) {
- if (sh_priv->loops)
- GF_FREE (sh_priv->loops);
-
- GF_FREE (sh_priv);
- }
-
-
-}
-
-
-static uint32_t
-__make_cookie (int loop_index, int child_index)
-{
- uint32_t ret = ((loop_index << 16) | child_index);
- return ret;
-}
-
-
-static int
-__loop_index (uint32_t cookie)
-{
- return ((cookie & 0xFFFF0000) >> 16);
-}
-
-
-static int
-__child_index (uint32_t cookie)
-{
- return (cookie & 0x0000FFFF);
-}
-
-
-static void
-sh_diff_loop_state_reset (struct sh_diff_loop_state *loop_state, int child_count)
-{
- loop_state->active = _gf_false;
-// loop_state->offset = 0;
-
- memset (loop_state->write_needed,
- 0, sizeof (*loop_state->write_needed) * child_count);
-
- memset (loop_state->checksum,
- 0, MD5_DIGEST_LEN * child_count);
-}
-
-
-static int
-sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
+ sh_loop_driver_done (sh_frame, this, old_loop_frame);
}
-
- return writes;
-}
-
-
-static int
-sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
-
- sh_diff_private_cleanup (frame, this);
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
-
- local->self_heal.algo_completion_cbk (frame, this);
- }
-
return 0;
}
static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state);
-
-static int
-sh_diff_loop_return (call_frame_t *rw_frame, xlator_t *this,
- struct sh_diff_loop_state *loop_state)
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
afr_local_t * sh_local = NULL;
afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
- gf_log (this->name, GF_LOG_TRACE,
- "loop for offset %"PRId64" returned", loop_state->offset);
+ if (loop_frame) {
+ loop_local = loop_frame->local;
+ if (loop_local)
+ loop_sh = &loop_local->self_heal;
+ if (loop_sh)
+ gf_log (this->name, GF_LOG_TRACE, "loop for offset "
+ "%"PRId64" returned", loop_sh->offset);
+ }
- AFR_STACK_DESTROY (rw_frame);
+ if (op_ret == -1) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ if (loop_frame) {
+ sh_loop_finish (loop_frame, this);
+ loop_frame = NULL;
+ }
+ }
- sh_diff_loop_driver (sh_frame, this, _gf_false, loop_state);
+ sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
return 0;
}
-
static int
-sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
call_frame_t *sh_frame = NULL;
afr_local_t * sh_local = NULL;
afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- struct sh_diff_loop_state *loop_state = NULL;
int call_count = 0;
int child_index = 0;
- int loop_index = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
gf_log (this->name, GF_LOG_TRACE,
"wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- loop_state->offset);
+ op_ret, sh_local->loc.path, child_index, loop_sh->offset);
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "write to %s failed on subvolume %s (%s)",
+ sh_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (loop_sh, op_errno);
+ } else if (op_ret < loop_local->cont.writev.vector->iov_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "incomplete write to %s on subvolume %s "
+ "(expected %lu, returned %d)", sh_local->loc.path,
+ priv->children[child_index]->name,
+ loop_local->cont.writev.vector->iov_len, op_ret);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
- UNLOCK (&sh_frame->lock);
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
- sh_diff_loop_return (rw_frame, this, loop_state);
+ iobref_unref(loop_local->cont.writev.iobref);
+
+ sh_loop_return (sh_frame, this, loop_frame,
+ loop_sh->op_ret, loop_sh->op_errno);
}
return 0;
}
+static void
+sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
+ afr_private_t *priv)
+{
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ if (!strcmp (sh->algo->name, "diff"))
+ return;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ /* full self-heal guarantees there exists atleast 1 file with size 0
+ * That means for other files we can preserve holes that come after
+ * its size before 'trim'
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->write_needed[i] &&
+ ((loop_sh->offset + 1) > sh->buf[i].ia_size))
+ loop_sh->write_needed[i] = 0;
+ }
+}
static int
-sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie,
+sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
- uint32_t wcookie = 0;
int i = 0;
int call_count = 0;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t * sh = NULL;
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
-
- call_count = sh_diff_number_of_writes_needed (loop_state->write_needed,
- priv->child_count);
-
- rw_local->call_count = call_count;
gf_log (this->name, GF_LOG_TRACE,
"read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, loop_state->offset);
-
- if ((op_ret <= 0) ||
- (call_count == 0)) {
- sh_diff_loop_return (rw_frame, this, loop_state);
+ op_ret, loop_local->loc.path, loop_sh->offset);
- return 0;
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
+ "for %s reason :%s", sh->source,
+ sh_local->loc.path, strerror (errno));
+ } else {
+ sh->eof_reached = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
+ sh_local->loc.path);
+ }
+ sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
+ goto out;
}
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- gf_log (this->name, GF_LOG_DEBUG, "0 filled block");
- sh_diff_loop_return (rw_frame, this, loop_state);
- goto out;
- }
+ if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
+ sh_prune_writes_needed (sh_frame, loop_frame, priv);
+
+ call_count = sh_number_of_writes_needed (loop_sh->write_needed,
+ priv->child_count);
+ if (call_count == 0) {
+ sh_loop_return (sh_frame, this, loop_frame, 0, 0);
+ goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (loop_state->write_needed[i]) {
- wcookie = __make_cookie (loop_index, i);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_write_cbk,
- (void *) (long) wcookie,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count,
- loop_state->offset, iobref);
+ /*
+ * We only really need the request size at the moment, but the buffer
+ * is required if we want to issue a retry in the event of a short write.
+ * Therefore, we duplicate the vector and ref the iobref here...
+ */
+ loop_local->cont.writev.vector = iov_dup(vector, count);
+ loop_local->cont.writev.iobref = iobref_ref(iobref);
- if (!--call_count)
- break;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!loop_sh->write_needed[i])
+ continue;
+ STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ loop_sh->healing_fd, vector, count,
+ loop_sh->offset, 0, iobref, NULL);
+
+ if (!--call_count)
+ break;
}
out:
@@ -699,101 +569,79 @@ out:
static int
-sh_diff_read (call_frame_t *rw_frame, xlator_t *this,
- int loop_index)
+sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- struct sh_diff_loop_state *loop_state = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- uint32_t cookie = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_state = sh_priv->loops[loop_index];
-
- cookie = __make_cookie (loop_index, sh->source);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_read_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh_priv->block_size,
- loop_state->offset);
+ STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->readv,
+ loop_sh->healing_fd, loop_sh->block_size,
+ loop_sh->offset, 0, NULL);
return 0;
}
static int
-sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- int loop_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
int child_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
int call_count = 0;
int i = 0;
int write_needed = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
-
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
-
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
- memcpy (loop_state->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum,
- MD5_DIGEST_LEN);
+ memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
+ strong_checksum, MD5_DIGEST_LENGTH);
}
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
for (i = 0; i < priv->child_count; i++) {
if (sh->sources[i] || !sh_local->child_up[i])
continue;
- if (memcmp (loop_state->checksum + (i * MD5_DIGEST_LEN),
- loop_state->checksum + (sh->source * MD5_DIGEST_LEN),
- MD5_DIGEST_LEN)) {
+ if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
+ loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
+ MD5_DIGEST_LENGTH)) {
/*
Checksums differ, so this block
must be written to this sink
@@ -802,9 +650,9 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"checksum on subvolume %s at offset %"
PRId64" differs from that on source",
- priv->children[i]->name, loop_state->offset);
+ priv->children[i]->name, loop_sh->offset);
- write_needed = loop_state->write_needed[i] = 1;
+ write_needed = loop_sh->write_needed[i] = 1;
}
}
@@ -816,272 +664,171 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
}
UNLOCK (&sh_priv->lock);
- if (write_needed && !sh->op_failed) {
- sh_diff_read (rw_frame, this, loop_index);
+ if (write_needed &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ sh_loop_read (loop_frame, this);
} else {
- sh->offset += sh_priv->block_size;
-
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
}
}
return 0;
}
-
-static int
-sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max)
-{
- int i = 0;
-
- LOCK (&sh_priv->lock);
- {
- for (i = 0; i < max; i++) {
- if (sh_priv->loops[i]->active == _gf_false) {
- sh_priv->loops[i]->active = _gf_true;
- break;
- }
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (i == max) {
- gf_log ("[sh-diff]", GF_LOG_ERROR,
- "no free loops found! This shouldn't happen. Please"
- " report this to gluster-devel@nongnu.org");
- }
-
- return i;
-}
-
-
static int
-sh_diff_checksum (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- call_frame_t *rw_frame = NULL;
- uint32_t cookie = 0;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
- int32_t op_errno = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
-
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
-
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
-
- rw_sh->offset = sh->offset;
- rw_sh->sh_frame = frame;
-
- call_count = sh->active_sinks + 1; /* sinks and source */
-
- rw_local->call_count = call_count;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int call_count = 0;
+ int i = 0;
- loop_index = sh_diff_find_unused_loop (sh_priv, priv->data_self_heal_window_size);
-
- loop_state = sh_priv->loops[loop_index];
- loop_state->offset = offset;
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- /* we need to send both the loop index and child index,
- so squeeze them both into a 32-bit number */
+ call_count = loop_sh->active_sinks + 1; /* sinks and source */
- cookie = __make_cookie (loop_index, sh->source);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
continue;
- cookie = __make_cookie (loop_index, i);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
if (!--call_count)
break;
}
return 0;
-
-out:
- sh->op_failed = 1;
-
- sh_diff_loop_driver (frame, this, _gf_false, loop_state);
-
- return 0;
}
-
static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state)
+sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
- char sh_type_str[256] = {0,};
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- afr_self_heal_type_str_get(sh, sh_type_str, sizeof(sh_type_str));
-
- LOCK (&sh_priv->lock);
- {
- if (loop_state)
- sh_diff_loop_state_reset (loop_state, priv->child_count);
- if (_gf_false == is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh_priv->block_size;
- while ((0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh_priv->block_size;
- sh_priv->loops_running++;
-
- if (_gf_false == is_first_call)
- break;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- sh_diff_loop_driver (frame, this, _gf_false, NULL);
- } else {
- sh_diff_checksum (frame, this, offset);
- offset += block_size;
- }
- }
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- if (is_driver_done) {
- sh_diff_loop_driver_done (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+ loop_sh->write_needed[i] = 1;
}
+ sh_loop_read (loop_frame, this);
return 0;
}
-
-int
-afr_sh_algo_diff (call_frame_t *frame, xlator_t *this)
+afr_sh_algo_private_t*
+afr_sh_priv_init ()
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ afr_sh_algo_private_t *sh_priv = NULL;
sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
gf_afr_mt_afr_private_t);
if (!sh_priv)
- goto err;
-
- sh_priv->block_size = this->ctx->page_size;
-
- sh->private = sh_priv;
+ goto out;
LOCK_INIT (&sh_priv->lock);
+out:
+ return sh_priv;
+}
- local->call_count = 0;
-
- sh_priv->loops = GF_CALLOC (priv->data_self_heal_window_size,
- sizeof (*sh_priv->loops),
- gf_afr_mt_sh_diff_loop_state);
- if (!sh_priv->loops)
- goto err;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- sh_priv->loops[i] = GF_CALLOC (1, sizeof (*sh_priv->loops[i]),
- gf_afr_mt_sh_diff_loop_state);
- if (!sh_priv->loops[i])
- goto err;
-
- sh_priv->loops[i]->checksum = GF_CALLOC (priv->child_count,
- MD5_DIGEST_LEN, gf_afr_mt_uint8_t);
- if (!sh_priv->loops[i]->checksum)
- goto err;
-
- sh_priv->loops[i]->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*sh_priv->loops[i]->write_needed),
- gf_afr_mt_char);
- if (!sh_priv->loops[i]->write_needed)
- goto err;
+int
+afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_self_heal_t *dst_sh = NULL;
+ afr_local_t *src_local = NULL;
+ afr_self_heal_t *src_sh = NULL;
+ int ret = -1;
+
+ dst_local = dst->local;
+ dst_sh = &dst_local->self_heal;
+ src_local = src->local;
+ src_sh = &src_local->self_heal;
+ GF_ASSERT (src_sh->data_lock_held);
+ GF_ASSERT (!dst_sh->data_lock_held);
+ ret = afr_lk_transfer_datalock (dst, src, dom, child_count);
+ if (ret)
+ return ret;
+ src_sh->data_lock_held = _gf_false;
+ dst_sh->data_lock_held = _gf_true;
+ return 0;
+}
- }
+int
+afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
+ afr_sh_algo_fn sh_data_algo_start)
+{
+ call_frame_t *first_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
- sh_diff_loop_driver (frame, this, _gf_true, NULL);
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+ sh->sh_data_algo_start = sh_data_algo_start;
+ local->call_count = 0;
+ ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
+ if (ret)
+ goto out;
+ ret = afr_sh_transfer_lock (first_loop_frame, sh_frame, this->name,
+ priv->child_count);
+ if (ret)
+ goto out;
+ sh->private = afr_sh_priv_init ();
+ if (!sh->private) {
+ ret = -1;
+ goto out;
+ }
+ sh_loop_driver (sh_frame, this, _gf_true, first_loop_frame);
+ ret = 0;
+out:
+ if (ret) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ sh_loop_driver_done (sh_frame, this, NULL);
+ }
return 0;
-err:
- if (sh_priv) {
- if (sh_priv->loops) {
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
- if (sh_priv->loops[i])
- GF_FREE (sh_priv->loops[i]);
- }
-
- GF_FREE (sh_priv->loops);
- }
+}
- GF_FREE (sh_priv);
- }
+int
+afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
return 0;
}
+int
+afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
+ return 0;
+}
struct afr_sh_algorithm afr_self_heal_algorithms[] = {
{.name = "full", .fn = afr_sh_algo_full},
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
index 2790dbc6a..6b20789b1 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
@@ -1,26 +1,16 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
#define __AFR_SELF_HEAL_ALGORITHM_H__
-
typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
xlator_t *this);
@@ -30,31 +20,13 @@ struct afr_sh_algorithm {
};
extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-
typedef struct {
gf_lock_t lock;
unsigned int loops_running;
off_t offset;
-} afr_sh_algo_full_private_t;
-
-struct sh_diff_loop_state {
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- gf_boolean_t active;
-};
-
-typedef struct {
- size_t block_size;
-
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
int32_t total_blocks;
int32_t diff_blocks;
-
- struct sh_diff_loop_state **loops;
-} afr_sh_algo_diff_private_t;
+} afr_sh_algo_private_t;
#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 15b659fa8..ef92b4205 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "glusterfs.h"
@@ -27,6 +18,52 @@
#include "afr-self-heal.h"
#include "pump.h"
+#define ADD_FMT_STRING(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_NOT_ATTEMPTED != status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+#define ADD_FMT_STRING_SYNC(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_SYNC_BEGIN == status || \
+ AFR_SELF_HEAL_FAILED == status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ memset (sh->parentbufs, 0,
+ sizeof (*sh->parentbufs) * priv->child_count);
+ memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
+ memset (sh->locked_nodes, 0,
+ sizeof (*sh->locked_nodes) * priv->child_count);
+ sh->active_sinks = 0;
+
+ afr_reset_xattr (sh->xattr, priv->child_count);
+}
+
//Intersection[child]=1 if child is part of intersection
void
afr_children_intersection_get (int32_t *set1, int32_t *set2,
@@ -57,20 +94,28 @@ afr_sh_select_source (int sources[], int child_count)
return -1;
}
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
-/**
- * sink_count - return number of sinks in sources array
- */
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
-int
-afr_sh_sink_count (int sources[], int child_count)
-{
- int i = 0;
- int sinks = 0;
- for (i = 0; i < child_count; i++)
- if (!sources[i])
- sinks++;
- return sinks;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
+ sh->success[i] = 1;
+ }
+ }
+ sh->active_sinks = active_sinks;
}
int
@@ -89,8 +134,8 @@ void
afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
{
sh->op_ret = -1;
- if (afr_error_more_important (sh->op_errno, op_errno))
- sh->op_errno = op_errno;
+ sh->op_errno = afr_most_important_error(sh->op_errno, op_errno,
+ _gf_false);
}
void
@@ -112,13 +157,85 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
}
sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_TRACE,
- "pending_matrix: %s", buf);
+ gf_log (this->name, GF_LOG_DEBUG, "pending_matrix: %s", buf);
}
GF_FREE (buf);
}
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
+ int child_count = priv->child_count;
+ char *matrix_begin = "[ [ ";
+ char *matrix_end = "] ]";
+ char *seperator = "] [ ";
+ int pending_entry_strlen = 12; //Including space after entry
+ int matrix_begin_strlen = 0;
+ int matrix_end_strlen = 0;
+ int seperator_strlen = 0;
+ int string_length = 0;
+ char *msg = "- Pending matrix: ";
+
+ /*
+ * for a list of lists of [ [ a b ] [ c d ] ]
+ * */
+
+ matrix_begin_strlen = strlen (matrix_begin);
+ matrix_end_strlen = strlen (matrix_end);
+ seperator_strlen = strlen (seperator);
+ string_length = matrix_begin_strlen + matrix_end_strlen
+ + (child_count -1) * seperator_strlen
+ + (child_count * child_count * pending_entry_strlen);
+
+ buf = GF_CALLOC (1, 1 + strlen (msg) + string_length , gf_afr_mt_char);
+ if (!buf)
+ goto out;
+
+ ptr = buf;
+ ptr += sprintf (ptr, "%s", msg);
+ ptr += sprintf (ptr, "%s", matrix_begin);
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ if (i < priv->child_count -1)
+ ptr += sprintf (ptr, "%s", seperator);
+ }
+
+ ptr += sprintf (ptr, "%s", matrix_end);
+
+out:
+ return buf;
+}
+
+void
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc)
+{
+ char *buf = NULL;
+ char *free_ptr = NULL;
+
+ buf = afr_get_pending_matrix_str (pending_matrix, this);
+ if (buf)
+ free_ptr = buf;
+ else
+ buf = "";
+
+
+ gf_log (this->name, GF_LOG_ERROR, "Unable to self-heal contents of '%s'"
+ " (possible split-brain). Please delete the file from all but "
+ "the preferred subvolume.%s", loc, buf);
+ GF_FREE (free_ptr);
+ return;
+}
+
+
void
afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
{
@@ -157,6 +274,7 @@ afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
int
afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
dict_t *xattr[], afr_transaction_type type,
size_t child_count)
{
@@ -167,12 +285,6 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
int i = 0;
int j = 0;
int k = 0;
- unsigned char *ignorant_subvols = NULL;
-
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count,
- gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
afr_init_pending_matrix (pending_matrix, child_count);
@@ -190,7 +302,8 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
* subvolume.
*/
- ignorant_subvols[i] = 1;
+ if (ignorant_subvols)
+ ignorant_subvols[i] = 1;
continue;
}
@@ -201,19 +314,14 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
}
}
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- child_count);
- GF_FREE (ignorant_subvols);
-out:
return ret;
}
typedef enum {
+ AFR_NODE_INVALID,
AFR_NODE_INNOCENT,
AFR_NODE_FOOL,
AFR_NODE_WISE,
- AFR_NODE_INVALID = -1,
} afr_node_type;
typedef struct {
@@ -293,7 +401,7 @@ afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
* It is 1 if no other wise node accuses it.
* Only wise nodes with wisdom 1 are sources.
*
- * If no nodes with wisdom 1 exist, a split-brain has occured.
+ * If no nodes with wisdom 1 exist, a split-brain has occurred.
*/
static void
@@ -393,6 +501,8 @@ afr_find_biggest_witness_among_fools (int32_t *witnesses,
{
int i = 0;
int biggest_witness = -1;
+ int biggest_witness_idx = -1;
+ int biggest_witness_cnt = -1;
GF_ASSERT (witnesses);
GF_ASSERT (characters);
@@ -402,10 +512,21 @@ afr_find_biggest_witness_among_fools (int32_t *witnesses,
if (characters[i].type != AFR_NODE_FOOL)
continue;
- if (biggest_witness < witnesses[i])
+ if (biggest_witness < witnesses[i]) {
biggest_witness = witnesses[i];
+ biggest_witness_idx = i;
+ biggest_witness_cnt = 1;
+ continue;
+ }
+
+ if (biggest_witness == witnesses[i])
+ biggest_witness_cnt++;
}
- return biggest_witness;
+
+ if (biggest_witness_cnt != 1)
+ return -1;
+
+ return biggest_witness_idx;
}
int
@@ -433,10 +554,84 @@ afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
return nsources;
}
+
+int
+afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx)
+{
+ if (idx >= 0 && idx < child_count) {
+ sources[idx] = 1;
+ return 1;
+ }
+ return 0;
+}
+
+
+static int
+afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_size = 0;
+ uint64_t min_size = 0;
+ int num_children = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_size > max_size) {
+ max_size = bufs[child].ia_size;
+ idx = child;
+ }
+
+ if ((num_children == 0) || (bufs[child].ia_size < min_size)) {
+ min_size = bufs[child].ia_size;
+ }
+
+ num_children++;
+ }
+
+ /* If sizes are same for all of them, finding sources will have to
+ * happen with pending changelog. So return -1
+ */
+ if ((num_children > 1) && (min_size == max_size))
+ return -1;
+ return idx;
+}
+
+
+static int
+afr_find_newest_file (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_ctime = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_ctime > max_ctime) {
+ max_ctime = bufs[child].ia_ctime;
+ idx = child;
+ }
+ }
+
+ return idx;
+}
+
+
static int
afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
afr_node_character *characters,
- int child_count)
+ int32_t *success_children,
+ int child_count, struct iatt *bufs)
{
int32_t biggest_witness = 0;
int nsources = 0;
@@ -444,6 +639,11 @@ afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
GF_ASSERT (child_count > 0);
+ biggest_witness = afr_find_largest_file_size (bufs, success_children,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
gf_afr_mt_int32_t);
if (NULL == witnesses) {
@@ -456,34 +656,34 @@ afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
characters,
child_count);
- nsources = afr_mark_fool_as_source_by_witness (sources, witnesses,
- characters, child_count,
- biggest_witness);
+ if (biggest_witness != -1)
+ goto found;
+
+ biggest_witness = afr_find_newest_file (bufs, success_children,
+ child_count);
+
+found:
+ nsources = afr_mark_fool_as_source_by_idx (sources, child_count,
+ biggest_witness);
out:
- if (witnesses)
- GF_FREE (witnesses);
+ GF_FREE (witnesses);
return nsources;
}
int
afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *valid_children, int child_count,
- uint32_t uid)
+ int32_t *success_children,
+ unsigned int child_count, uint32_t uid)
{
int i = 0;
int nsources = 0;
int child = 0;
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (sources);
- GF_ASSERT (child_count > 0);
-
for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
+ if (-1 == success_children[i])
+ break;
- child = valid_children[i];
+ child = success_children[i];
if (uid == bufs[child].ia_uid) {
sources[child] = 1;
nsources++;
@@ -493,21 +693,17 @@ afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
}
int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
- int child_count)
+afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count)
{
int i = 0;
int smallest = -1;
int child = 0;
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (child_count > 0);
-
for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
- child = valid_children[i];
+ if (-1 == success_children[i])
+ break;
+ child = success_children[i];
if ((smallest == -1) ||
(bufs[child].ia_uid < bufs[smallest].ia_uid)) {
smallest = child;
@@ -517,25 +713,97 @@ afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
}
static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *valid_children,
+afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
int child_count, int32_t *sources)
{
int nsources = 0;
int smallest = 0;
- smallest = afr_get_child_with_lowest_uid (bufs, valid_children,
+ smallest = afr_get_child_with_lowest_uid (bufs, success_children,
child_count);
if (smallest < 0) {
nsources = -1;
goto out;
}
nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- valid_children, child_count,
+ success_children, child_count,
bufs[smallest].ia_uid);
out:
return nsources;
}
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int child = -1;
+ int read_child = -1;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (read_child < 0)
+ read_child = child;
+ else if (bufs[read_child].ia_size < bufs[child].ia_size)
+ read_child = child;
+ }
+ return read_child;
+}
+
+int
+afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
+{
+ int nsources = 0;
+ int i = 0;
+ int child = 0;
+ gf_boolean_t sink_exists = _gf_false;
+ gf_boolean_t source_exists = _gf_false;
+ int source = -1;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (!bufs[child].ia_size) {
+ sink_exists = _gf_true;
+ continue;
+ }
+ if (!source_exists) {
+ source_exists = _gf_true;
+ source = child;
+ continue;
+ }
+ if (bufs[source].ia_size != bufs[child].ia_size) {
+ nsources = -1;
+ goto out;
+ }
+ }
+ if (!source_exists && !sink_exists) {
+ nsources = -1;
+ goto out;
+ }
+
+ if (!source_exists || !sink_exists)
+ goto out;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (bufs[child].ia_size) {
+ sources[child] = 1;
+ nsources++;
+ }
+ }
+out:
+ return nsources;
+}
+
char *
afr_get_character_str (afr_node_type type)
{
@@ -560,12 +828,10 @@ afr_get_character_str (afr_node_type type)
afr_node_type
afr_find_child_character_type (int32_t *pending_row, int32_t child,
- int32_t child_count, const char *xlator_name)
+ unsigned int child_count)
{
afr_node_type type = AFR_NODE_INVALID;
- GF_ASSERT (pending_row);
- GF_ASSERT (child_count > 0);
GF_ASSERT ((child >= 0) && (child < child_count));
if (afr_sh_is_innocent (pending_row, child_count))
@@ -574,44 +840,85 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child,
type = AFR_NODE_FOOL;
else if (afr_sh_is_wise (pending_row, child, child_count))
type = AFR_NODE_WISE;
- else
- GF_ASSERT (0);
-
- gf_log (xlator_name, GF_LOG_DEBUG, "child %d character %s",
- child, afr_get_character_str (type));
return type;
}
int
afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type)
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant)
{
afr_private_t *priv = NULL;
afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
int nsources = -1;
+ unsigned char *ignorant_subvols = NULL;
+ unsigned int child_count = 0;
priv = this->private;
+ child_count = priv->child_count;
if (afr_get_children_count (success_children, priv->child_count) == 0)
goto out;
+ if (!ignore_ignorant) {
+ ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
+ child_count, gf_afr_mt_char);
+ if (NULL == ignorant_subvols)
+ goto out;
+ }
+
afr_build_pending_matrix (priv->pending_key, pending_matrix,
- xattr, type, priv->child_count);
+ ignorant_subvols, xattr, type,
+ priv->child_count);
+ if (!ignore_ignorant)
+ afr_mark_ignorant_subvols_as_pending (pending_matrix,
+ ignorant_subvols,
+ priv->child_count);
sh_type = afr_self_heal_type_for_transaction (type);
if (AFR_SELF_HEAL_INVALID == sh_type)
goto out;
afr_sh_print_pending_matrix (pending_matrix, this);
- nsources = afr_mark_sources (sources, pending_matrix, bufs,
- priv->child_count, sh_type,
- success_children, this->name);
+ nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
+ sh_type, success_children, subvol_status);
out:
+ GF_FREE (ignorant_subvols);
return nsources;
}
+void
+afr_find_character_types (afr_node_character *characters,
+ int32_t **pending_matrix, int32_t *success_children,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+ int child = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ type = afr_find_child_character_type (pending_matrix[child],
+ child, child_count);
+ characters[child].type = type;
+ }
+}
+
+void
+afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
+ unsigned int child_count)
+{
+ int i = 0;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ sources[success_children[i]] = 1;
+ }
+}
/**
* mark_sources: Mark all 'source' nodes and return number of source
* nodes found
@@ -637,17 +944,18 @@ out:
*/
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name)
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status)
{
/* stores the 'characters' (innocent, fool, wise) of the nodes */
-
afr_node_character *characters = NULL;
- int i = 0;
- int nsources = -1;
- xlator_t *this = NULL;
+ int nsources = -1;
+ unsigned int child_count = 0;
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ child_count = priv->child_count;
characters = GF_CALLOC (sizeof (afr_node_character),
child_count, gf_afr_mt_afr_node_character);
if (!characters)
@@ -656,28 +964,29 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
this = THIS;
/* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
+ memset (sources, 0, sizeof (*sources) * child_count);
nsources = 0;
- for (i = 0; i < child_count; i++) {
- characters[i].type =
- afr_find_child_character_type (pending_matrix[i], i,
- child_count,
- xlator_name);
- if (AFR_NODE_INVALID == characters[i].type)
- gf_log (xlator_name, GF_LOG_WARNING,
- "child %d had invalid xattrs", i);
- }
-
- if ((type == AFR_SELF_HEAL_METADATA)
- && afr_sh_all_nodes_innocent (characters, child_count)) {
-
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- valid_children,
+ afr_find_character_types (characters, pending_matrix, success_children,
+ child_count);
+ if (afr_sh_all_nodes_innocent (characters, child_count)) {
+ switch (type) {
+ case AFR_SELF_HEAL_METADATA:
+ nsources = afr_sh_mark_lowest_uid_as_source (bufs,
+ success_children,
+ child_count,
+ sources);
+ break;
+ case AFR_SELF_HEAL_DATA:
+ nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
+ success_children,
child_count,
sources);
+ if ((nsources < 0) && subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
+ break;
+ default:
+ break;
+ }
goto out;
}
@@ -685,32 +994,29 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
afr_sh_compute_wisdom (pending_matrix, characters, child_count);
if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
- gf_log (this->name, GF_LOG_INFO,
- "split-brain possible, no source detected");
+ if (subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
nsources = -1;
-
} else {
nsources = afr_sh_mark_wisest_as_sources (sources,
characters,
child_count);
}
} else {
+ if (subvol_status)
+ *subvol_status |= ALL_FOOLS;
nsources = afr_mark_biggest_of_fools_as_source (sources,
pending_matrix,
characters,
- child_count);
+ success_children,
+ child_count, bufs);
}
out:
- if (nsources == 0) {
- for (i = 0; i < child_count; i++) {
- if (valid_children[i] != -1)
- sources[valid_children[i]] = 1;
- }
- }
- if (characters)
- GF_FREE (characters);
+ if (nsources == 0)
+ afr_mark_success_children_sources (sources, success_children,
+ child_count);
+ GF_FREE (characters);
gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
return nsources;
@@ -718,84 +1024,111 @@ out:
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = 0;
- int i = 0;
- int j = 0;
- int k = 0;
+ int tgt = 0;
+ int src = 0;
+ int value = 0;
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
- }
- }
-
- for (i = 0; i < child_count; i++) {
- if (pending_raw)
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- &pending_raw);
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Unable to get dict value.");
- if (!success[j])
- continue;
-
- k = afr_index_for_transaction_type (type);
+ afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
+ xattr, type, priv->child_count);
- if (pending_raw != NULL) {
- memcpy (pending, pending_raw, sizeof(pending));
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
+ /*
+ * The algorithm here has two parts. First, for each subvol indexed
+ * as tgt, we try to figure out what count everyone should have for it.
+ * If the self-heal succeeded, that's easy; the value is zero.
+ * Otherwise, the value is the maximum of the succeeding nodes' counts.
+ * Once we know the value, we loop through (possibly for a second time)
+ * setting each count to the difference so that when we're done all
+ * succeeding nodes will have the same count for tgt.
+ */
+ for (tgt = 0; tgt < priv->child_count; ++tgt) {
+ value = 0;
+ if (!success[tgt]) {
+ /* Find the maximum. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (!success[src]) {
+ continue;
+ }
+ if (delta_matrix[src][tgt] > value) {
+ value = delta_matrix[src][tgt];
+ }
+ }
+ }
+ /* Force everyone who succeeded to the chosen value. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (success[src]) {
+ delta_matrix[src][tgt] = value
+ - delta_matrix[src][tgt];
+ }
+ else {
+ delta_matrix[src][tgt] = 0;
}
-
}
}
}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ int32_t *pending = NULL;
+ int32_t *local_pending = NULL;
+ afr_private_t *priv = NULL;
+ priv = this->private;
for (i = 0; i < child_count; i++) {
if (!xattr[i])
continue;
+ local_pending = NULL;
for (j = 0; j < child_count; j++) {
pending = GF_CALLOC (sizeof (int32_t), 3,
gf_afr_mt_int32_t);
- if (!pending)
+ if (!pending) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate pending entry "
+ "for %s[%d] on %s",
+ priv->pending_key[j], type,
+ priv->children[i]->name);
continue;
+ }
/* 3 = data+metadata+entry */
k = afr_index_for_transaction_type (type);
pending[k] = hton32 (delta_matrix[i][j]);
+ if (j == i) {
+ local_pending = pending;
+ continue;
+ }
ret = dict_set_bin (xattr[i], priv->pending_key[j],
pending,
- 3 * sizeof (int32_t));
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_WARNING,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
"Unable to set dict value.");
+ GF_FREE (pending);
+ }
+ }
+ if (local_pending) {
+ ret = dict_set_bin (xattr[i], priv->pending_key[i],
+ local_pending,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (local_pending);
+ }
}
}
return 0;
@@ -803,146 +1136,23 @@ afr_sh_delta_to_xattr (afr_private_t *priv,
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
-
- if (pending[j])
- return 1;
- }
-
- return 0;
-}
-
-
-int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
-
- if (pending[j])
- return 1;
- }
-
- return 0;
-}
-
-
-int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
-
- if (pending[j])
- return 1;
- }
-
- return 0;
-}
-
-
-/**
- * is_matrix_zero - return true if pending matrix is all zeroes
- */
-
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < child_count; i++)
- for (j = 0; j < child_count; j++)
- if (pending_matrix[i][j])
- return 0;
- return 1;
-}
-
-
-int
afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 0;
- }
+ afr_sh_reset (frame, this);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda || sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
+ if (local->unhealable) {
+ gf_log (this->name, GF_LOG_DEBUG,
"split brain found, aborting selfheal of %s",
local->loc.path);
- sh->op_failed = 1;
+ }
+
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh->completion_cbk (frame, this);
} else {
gf_log (this->name, GF_LOG_TRACE,
@@ -970,12 +1180,44 @@ afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
return 0;
}
-static void
+int
+afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
+{
+ int ret = -ENOMEM;
+ sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
+ gf_afr_mt_iatt);
+ if (!sh->buf)
+ goto out;
+ sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
+ gf_afr_mt_iatt);
+ if (!sh->parentbufs)
+ goto out;
+ sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
+ gf_afr_mt_int);
+ if (!sh->child_errno)
+ goto out;
+ sh->success_children = afr_children_create (child_count);
+ if (!sh->success_children)
+ goto out;
+ sh->fresh_children = afr_children_create (child_count);
+ if (!sh->fresh_children)
+ goto out;
+ sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
+ gf_afr_mt_dict_t);
+ if (!sh->xattr)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+void
afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc)
{
int child_index = 0;
afr_local_t *local = NULL;
@@ -991,15 +1233,13 @@ afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
{
if (op_ret == 0) {
sh->buf[child_index] = *buf;
- sh->parentbuf = *postparent;
sh->parentbufs[child_index] = *postparent;
sh->success_children[sh->success_count] = child_index;
sh->success_count++;
sh->xattr[child_index] = dict_ref (xattr);
} else {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
+ gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume"
+ " %s => -1 (%s)", loc->path,
priv->children[child_index]->name,
strerror (op_errno));
local->self_heal.child_errno[child_index] = op_errno;
@@ -1027,64 +1267,140 @@ afr_valid_ia_type (ia_type_t ia_type)
return _gf_false;
}
+int
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame)
+{
+ afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ call_frame_t *new_frame = NULL;
+
+ op_errno = ENOMEM;
+ priv = this->private;
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
+
+ local = frame->local;
+ new_frame->local = impunge_local;
+ impunge_sh = &impunge_local->self_heal;
+ impunge_sh->sh_frame = frame;
+ impunge_sh->active_source = active_source;
+ impunge_local->child_up = memdup (local->child_up,
+ sizeof (*local->child_up) *
+ priv->child_count);
+ if (!impunge_local->child_up)
+ goto out;
+
+ impunge_local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!impunge_local->pending)
+ goto out;
+
+ ret = afr_sh_common_create (impunge_sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+ op_errno = 0;
+ *impunge_frame = new_frame;
+out:
+ if (op_errno && new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ return -op_errno;
+}
+
void
-afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent,
- afr_impunge_done_cbk_t impunge_done)
+afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
+ struct iatt *buf,
+ struct iatt *postparent,
+ afr_impunge_done_cbk_t impunge_done)
{
call_frame_t *impunge_frame = NULL;
afr_local_t *local = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *sh = NULL;
afr_self_heal_t *impunge_sh = NULL;
+ int ret = 0;
+ unsigned int enoent_count = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
int32_t op_errno = 0;
- impunge_frame = copy_frame (frame);
- if (!impunge_frame) {
- op_errno = ENOMEM;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (!enoent_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no missing files - %s. proceeding to metadata check",
+ local->loc.path);
goto out;
}
-
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- local = frame->local;
- sh = &local->self_heal;
- impunge_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = sh->source;
- impunge_sh->impunging_entry_mode = st_mode_from_ia (buf->ia_prot,
- buf->ia_type);
- impunge_sh->impunge_ret_child = child_index;
- loc_copy (&impunge_local->loc, &local->loc);
sh->impunge_done = impunge_done;
- impunge_local->call_count = 1;
- afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf,
- postparent);
+ ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
+ if (ret)
+ goto out;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc_copy (&impunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&impunge_sh->parent_loc,
+ &impunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ impunge_local->call_count = enoent_count;
+ impunge_sh->entrybuf = sh->buf[sh->source];
+ impunge_sh->parentbuf = sh->parentbufs[sh->source];
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (sh->child_errno[i] != ENOENT) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->child_errno[i] != ENOENT)
+ continue;
+ afr_sh_entry_impunge_create (impunge_frame, this, i);
+ enoent_count--;
+ }
+ GF_ASSERT (!enoent_count);
return;
out:
- gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s",
- local->loc.path, strerror (op_errno));
- impunge_done (frame, this, child_index, -1, op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
+ "reason: %s", local->loc.path, strerror (-ret));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ afr_sh_missing_entries_finish (frame, this);
}
int
-afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
+afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
int32_t op_ret, int32_t op_errno)
{
- int call_count = 0;
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "create entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_missing_entries_finish (frame, this);
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
return 0;
}
@@ -1094,26 +1410,11 @@ sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
int type = 0;
- afr_private_t *priv = NULL;
- int enoent_count = 0;
- int i = 0;
struct iatt *buf = NULL;
struct iatt *postparent = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- afr_sh_missing_entries_finish (frame, this);
- return 0;
- }
buf = &sh->buf[sh->source];
postparent = &sh->parentbufs[sh->source];
@@ -1122,72 +1423,80 @@ sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
if (!afr_valid_ia_type (type)) {
gf_log (this->name, GF_LOG_ERROR,
"%s: unknown file type: 0%o", local->loc.path, type);
- local->govinda_gOvinda = 1;
+ afr_set_local_for_unhealable (local);
afr_sh_missing_entries_finish (frame, this);
goto out;
}
- local->call_count = enoent_count;
- for (i = 0; i < priv->child_count; i++) {
- //If !child_up errno will be zero
- if (sh->child_errno[i] != ENOENT)
- continue;
- afr_sh_call_entry_impunge_recreate (frame, this, i,
+ afr_sh_missing_entry_call_impunge_recreate (frame, this,
buf, postparent,
afr_sh_create_entry_cbk);
- enoent_count--;
- }
- GF_ASSERT (enoent_count == 0);
out:
return 0;
}
void
-afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this)
+afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int32_t op_errno = 0;
ia_type_t ia_type = IA_INVAL;
int32_t nsources = 0;
+ loc_t *loc = NULL;
+ int32_t subvol_status = 0;
+ afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
+ gf_boolean_t split_brain = _gf_false;
+ int read_child = -1;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
+ loc = &local->loc;
- if (afr_get_children_count (sh->success_children,
- priv->child_count) == 0) {
- op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
- priv->child_count);
- goto out;
- }
-
- if (afr_gfid_missing_count (this->name, sh->success_children,
- sh->buf, priv->child_count,
- local->loc.path) ||
- afr_conflicting_iattrs (sh->buf, sh->success_children,
- priv->child_count, local->loc.path,
- this->name)) {
- //this can happen if finding the fresh parent dir failed
- local->govinda_gOvinda = 1;
- sh->op_failed = 1;
- op_errno = EIO;
+ if (op_ret < 0) {
+ if (op_errno == EIO) {
+ afr_set_local_for_unhealable (local);
+ }
+ // EIO can happen if finding the fresh parent dir failed
goto out;
}
//now No chance for the ia_type to conflict
ia_type = sh->buf[sh->success_children[0]].ia_type;
+ txn_type = afr_transaction_type_get (ia_type);
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
- sh->success_children,
- afr_transaction_type_get (ia_type));
+ sh->success_children, txn_type,
+ &subvol_status, _gf_false);
if (nsources < 0) {
gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
" in missing entry self-heal, continuing with the rest"
" of the self-heals", local->loc.path);
- op_errno = EIO;
- goto out;
+ if (subvol_status & SPLIT_BRAIN) {
+ split_brain = _gf_true;
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ nsources = 1;
+ sh->sources[sh->success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child
+ (this,
+ sh->success_children,
+ sh->buf);
+ sh->sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ op_errno = EIO;
+ goto out;
+ }
+ } else {
+ op_errno = EIO;
+ goto out;
+ }
}
afr_get_fresh_children (sh->success_children, sh->sources,
@@ -1201,38 +1510,78 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this)
if (sh->gfid_sh_success_cbk)
sh->gfid_sh_success_cbk (frame, this);
- sh_missing_entries_create (frame, this);
+ sh->type = sh->buf[sh->source].ia_type;
+ if (uuid_is_null (loc->inode->gfid))
+ uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
+ if (split_brain) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ sh_missing_entries_create (frame, this);
+ }
return;
out:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
}
static int
-afr_sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
{
int call_count = 0;
afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ sh = &local->self_heal;
+ priv = this->private;
afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
op_errno, inode, buf, xattr,
- postparent);
+ postparent, &sh->lookup_loc);
call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_missing_entries_lookup_done (frame, this);
+ if (call_count)
+ goto out;
+ op_ret = -1;
+ if (!sh->success_count) {
+ op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
+ priv->child_count);
+ gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
+ "reason %s", sh->lookup_loc.path,
+ strerror (op_errno));
+ goto done;
+ }
+
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
+ (afr_conflicting_iattrs (sh->buf, sh->success_children,
+ priv->child_count,
+ sh->lookup_loc.path, this->name))) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
+ "for %s", sh->lookup_loc.path);
+ goto done;
+ }
+
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
+ (afr_gfid_missing_count (this->name, sh->success_children,
+ sh->buf, priv->child_count,
+ sh->lookup_loc.path))) {
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
+ "for %s", sh->lookup_loc.path);
+ goto done;
+ }
+ op_ret = 0;
+done:
+ sh->lookup_done (frame, this, op_ret, op_errno);
+out:
return 0;
}
@@ -1255,7 +1604,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
LOCK (&frame->lock);
{
afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
}
@@ -1268,6 +1617,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
void
afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
int child_index, struct iatt *buf,
+ struct iatt *parentbuf,
afr_expunge_done_cbk_t expunge_done)
{
call_frame_t *expunge_frame = NULL;
@@ -1276,13 +1626,14 @@ afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
afr_self_heal_t *sh = NULL;
afr_self_heal_t *expunge_sh = NULL;
int32_t op_errno = 0;
+ int ret = 0;
expunge_frame = copy_frame (frame);
if (!expunge_frame) {
goto out;
}
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
local = frame->local;
sh = &local->self_heal;
@@ -1290,8 +1641,15 @@ afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
expunge_sh = &expunge_local->self_heal;
expunge_sh->sh_frame = frame;
loc_copy (&expunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&expunge_sh->parent_loc,
+ &expunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
sh->expunge_done = expunge_done;
- afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf);
+ afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
+ parentbuf);
return;
out:
gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
@@ -1328,15 +1686,18 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_missing_entries_finish (frame, this);
} else {
if (afr_gfid_missing_count (this->name, sh->fresh_children,
sh->buf, priv->child_count,
local->loc.path)) {
afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_cbk,
- _gf_true);
+ afr_sh_missing_entries_lookup_done,
+ sh->sh_gfid_req,
+ AFR_LOOKUP_FAIL_CONFLICTS|
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
} else {
//No need to set gfid so goto missing entries lookup done
//Behave as if you have done the lookup
@@ -1347,7 +1708,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
afr_children_copy (sh->success_children,
sh->fresh_children,
priv->child_count);
- afr_sh_missing_entries_lookup_done (frame, this);
+ afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
}
}
return 0;
@@ -1417,8 +1778,11 @@ afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
for (i = 0; i < priv->child_count; i++) {
if (!purge_condition (local, priv, i))
continue;
+ gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
+ "on %s", local->loc.path, priv->children[i]->name);
afr_sh_call_entry_expunge_remove (frame, this,
(long) i, &sh->buf[i],
+ &sh->parentbufs[i],
afr_sh_remove_entry_cbk);
}
out:
@@ -1468,8 +1832,8 @@ afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
sh->entrybuf.ia_gfid)))
continue;
- afr_fresh_children_add_child (sh->fresh_children,
- i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
}
afr_sh_purge_entry_common (frame, this,
@@ -1500,35 +1864,34 @@ afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
}
void
-afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this)
+afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
+ unsigned int child_count)
+{
+ afr_children_intersection_get (sh->success_children,
+ sh->fresh_parent_dirs,
+ sh->sources, child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, child_count);
+ memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
+}
+
+void
+afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int32_t fresh_child_enoents = 0;
int32_t fresh_parent_count = 0;
- int32_t op_errno = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- if (afr_get_children_count (sh->success_children,
- priv->child_count) == 0) {
- op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
- priv->child_count);
+ if (op_ret < 0)
goto fail;
- }
-
- //make intersection of (success_children & fresh_parent_dirs) fresh_children
- //the other success_children will be added to it if they are not stale
- afr_children_intersection_get (sh->success_children,
- sh->fresh_parent_dirs,
- sh->sources, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- memset (sh->sources, 0, sizeof (*sh->sources) * priv->child_count);
-
+ afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
priv->child_count);
//we need the enoent count of the subvols present in fresh_parent_dirs
@@ -1537,7 +1900,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this)
priv->child_count, ENOENT);
if (fresh_child_enoents == fresh_parent_count) {
afr_sh_set_error (sh, ENOENT);
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_purge_entry (frame, this);
} else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
priv->child_count, local->loc.path,
@@ -1551,39 +1914,22 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this)
afr_sh_purge_stale_entry (frame, this);
} else {
op_errno = EIO;
- local->govinda_gOvinda = 1;
+ afr_set_local_for_unhealable (local);
goto fail;
}
return;
fail:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
}
-static int
-afr_sh_children_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- int call_count = 0;
-
- afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
- op_errno, inode, buf, xattr,
- postparent);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_children_lookup_done (frame, this);
-
- return 0;
-}
-
-static int
-afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this)
+static void
+afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
@@ -1591,54 +1937,42 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this)
int enoent_count = 0;
int nsources = 0;
int source = -1;
+ int32_t subvol_status = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- /* If We can't find a fresh parent directory here,
- * we wont know which subvol is correct without finding a parent dir
- * upwards which has correct xattrs, for that we may have to
- * do lookups till root, we dont wanna do that,
- * instead make sure that if there are conflicting gfid
- * parent dirs, self-heal thus lookup is failed with EIO.
- * if there are missing entries we dont know whether to delete or
- * create so fail with EIO,
- * If there are conflicting xattr fail with EIO.
- */
- if (afr_get_children_count (sh->success_children,
- priv->child_count) == 0) {
- gf_log (this->name, GF_LOG_ERROR, "Parent dir lookup failed "
- "for %s, in missing entry self-heal, continuing with "
- "the rest of the self-heals", local->loc.path);
+ if (op_ret < 0)
goto out;
- }
-
enoent_count = afr_errno_count (NULL, sh->child_errno,
priv->child_count, ENOENT);
if (enoent_count > 0) {
gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
- " in missing entry self-heal, continuing with the rest"
- " of the self-heals", local->loc.path);
- goto out;
- }
-
- if (afr_conflicting_iattrs (sh->buf, sh->success_children,
- priv->child_count, sh->parent_loc.path,
- this->name)) {
- gf_log (this->name, GF_LOG_INFO, "conflicting stat info for "
- "parent dirs of %s", local->loc.path);
- goto out;
+ " in missing entry self-heal, aborting missing-entry "
+ "self-heal",
+ local->loc.path);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
- " in missing entry self-heal, continuing with the rest"
- " of the self-heals", local->loc.path);
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", sh->parent_loc.path);
+ afr_mark_success_children_sources (sh->sources,
+ sh->success_children,
+ priv->child_count);
+ } else if (nsources < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
+ "of %s, in missing entry self-heal, aborting "
+ "self-heal", local->loc.path);
+ op_errno = EIO;
goto out;
}
@@ -1646,39 +1980,21 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this)
if (source == -1) {
GF_ASSERT (0);
gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
goto out;
}
afr_get_fresh_children (sh->success_children, sh->sources,
sh->fresh_parent_dirs, priv->child_count);
afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_children_lookup_cbk, _gf_false);
- return 0;
+ afr_sh_children_lookup_done, NULL, 0,
+ NULL);
+ return;
out:
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
- return 0;
-}
-
-int
-afr_sh_conflicting_entry_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
-{
- int call_count = 0;
-
- afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
- op_errno, inode, buf, xattr,
- postparent);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_find_fresh_parents (frame, this);
-
- return 0;
+ afr_sh_set_error (sh, op_errno);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
void
@@ -1696,6 +2012,7 @@ afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
afr_reset_children (sh->success_children, child_count);
afr_reset_children (sh->fresh_children, child_count);
afr_reset_xattr (sh->xattr, child_count);
+ loc_wipe (&sh->lookup_loc);
}
/* afr self-heal state will be lost if this call is made
@@ -1703,7 +2020,8 @@ afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
*/
int
afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid)
+ afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
+ int32_t flags, dict_t *xdata)
{
afr_local_t *local = NULL;
int i = 0;
@@ -1716,8 +2034,7 @@ afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
priv = this->private;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
@@ -1725,24 +2042,27 @@ afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (xattr_req) {
afr_xattr_req_prepare (this, xattr_req, loc->path);
- if (set_gfid) {
+ if (gfid) {
gf_log (this->name, GF_LOG_DEBUG,
"looking up %s with gfid: %s",
- local->loc.path, uuid_utoa (sh->sh_gfid_req));
- GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
- afr_set_dict_gfid (xattr_req, sh->sh_gfid_req);
+ loc->path, uuid_utoa (gfid));
+ GF_ASSERT (!uuid_is_null (gfid));
+ afr_set_dict_gfid (xattr_req, gfid);
}
}
afr_sh_common_reset (sh, priv->child_count);
+ sh->lookup_done = lookup_done;
+ loc_copy (&sh->lookup_loc, loc);
+ sh->lookup_flags = flags;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
gf_log (this->name, GF_LOG_DEBUG,
"looking up %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ loc->path, priv->children[i]->name);
STACK_WIND_COOKIE (frame,
- lookup_cbk,
+ afr_sh_common_lookup_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->lookup,
@@ -1762,7 +2082,8 @@ afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
int
-afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
+afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
+ xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -1775,38 +2096,16 @@ afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_INFO,
"Non blocking entrylks failed.");
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_done (frame, this);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"Non blocking entrylks done. Proceeding to FOP");
afr_sh_common_lookup (frame, this, &sh->parent_loc,
- afr_sh_conflicting_entry_lookup_cbk,
- _gf_false);
- }
-
- return 0;
-}
-
-int
-afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- afr_sh_missing_entries_done (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_cbk,
- _gf_true);
+ afr_sh_find_fresh_parents,
+ NULL, AFR_LOOKUP_FAIL_CONFLICTS,
+ NULL);
}
return 0;
@@ -1818,11 +2117,11 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- sh = &local->self_heal;
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
@@ -1832,7 +2131,12 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
int_lock->lk_basename = base_name;
int_lock->lk_loc = loc;
int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = this->name;
+ int_lock->lockee_count = 0;
+ afr_init_entry_lockee (&int_lock->lockee[0], local, loc,
+ base_name, priv->child_count);
+ int_lock->lockee_count++;
afr_nonblocking_entrylk (frame, this);
return 0;
@@ -1842,57 +2146,65 @@ static int
afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
afr_lock_cbk_t lock_cbk)
{
- afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
local = frame->local;
- int_lock = &local->internal_lock;
sh = &local->self_heal;
- priv = this->private;
gf_log (this->name, GF_LOG_TRACE,
"attempting to recreate missing entries for path=%s",
local->loc.path);
- GF_ASSERT (local->loc.parent);
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno);
+ if (ret)
+ goto out;
afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
lock_cbk);
return 0;
-}
-
-static int
-afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_conflicting_sh_cbk);
+out:
+ int_lock = &local->internal_lock;
+ int_lock->lock_op_ret = -1;
+ lock_cbk (frame, this);
return 0;
}
static int
-afr_self_heal_gfids (call_frame_t *frame, xlator_t *this)
+afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+
afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_gfid_sh_cbk);
+ afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
return 0;
}
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+afr_local_t*
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *lc = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *shc = NULL;
+ int ret = 0;
priv = this->private;
sh = &l->self_heal;
- lc = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
+ lc = mem_get0 (this->local_pool);
if (!lc)
goto out;
@@ -1900,26 +2212,32 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
shc->unwind = sh->unwind;
shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
- shc->need_missing_entry_self_heal = sh->need_missing_entry_self_heal;
- shc->need_gfid_self_heal = sh->need_gfid_self_heal;
- shc->need_data_self_heal = sh->need_data_self_heal;
- shc->need_metadata_self_heal = sh->need_metadata_self_heal;
- shc->need_entry_self_heal = sh->need_entry_self_heal;
+ shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
+ shc->do_gfid_self_heal = sh->do_gfid_self_heal;
+ shc->do_data_self_heal = sh->do_data_self_heal;
+ shc->do_metadata_self_heal = sh->do_metadata_self_heal;
+ shc->do_entry_self_heal = sh->do_entry_self_heal;
+ shc->force_confirm_spb = sh->force_confirm_spb;
shc->forced_merge = sh->forced_merge;
- shc->healing_fd_opened = sh->healing_fd_opened;
- shc->data_lock_held = sh->data_lock_held;
- if (sh->healing_fd && !sh->healing_fd_opened)
- shc->healing_fd = fd_ref (sh->healing_fd);
- else
- shc->healing_fd = sh->healing_fd;
shc->background = sh->background;
shc->type = sh->type;
+ shc->data_sh_info = "";
+ shc->metadata_sh_info = "";
uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
+ if (l->loc.path) {
+ ret = loc_copy (&lc->loc, &l->loc);
+ if (ret < 0)
+ goto out;
+ }
+
+ lc->child_up = memdup (l->child_up,
+ sizeof (*lc->child_up) * priv->child_count);
+ if (!lc->child_up) {
+ ret = -1;
+ goto out;
+ }
- lc->child_up = memdup (l->child_up, priv->child_count);
if (l->xattr_req)
lc->xattr_req = dict_ref (l->xattr_req);
@@ -1927,40 +2245,25 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
if (l->cont.lookup.xattr)
lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.entry_locked_nodes)
- lc->internal_lock.entry_locked_nodes =
- memdup (l->internal_lock.entry_locked_nodes,
- priv->child_count);
- else
- lc->internal_lock.entry_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+ lc->internal_lock.locked_nodes =
+ GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
+ priv->child_count, gf_afr_mt_char);
+ if (!lc->internal_lock.locked_nodes) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_inodelk_init (&lc->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret)
+ goto out;
out:
+ if (ret) {
+ afr_local_cleanup (lc, this);
+ lc = NULL;
+ }
return lc;
}
@@ -1970,32 +2273,39 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
+ afr_local_t * orig_frame_local = NULL;
+ afr_self_heal_t * orig_frame_sh = NULL;
char sh_type_str[256] = {0,};
- gf_boolean_t split_brain = _gf_false;
+ gf_loglevel_t loglevel = 0;
priv = this->private;
local = bgsh_frame->local;
sh = &local->self_heal;
- if (local->govinda_gOvinda)
- split_brain = _gf_true;
-
- afr_set_split_brain (this, sh->inode, split_brain);
+ if (local->unhealable) {
+ afr_set_split_brain (this, sh->inode, SPB, SPB);
+ }
afr_self_heal_type_str_get (sh, sh_type_str,
sizeof(sh_type_str));
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_ERROR, "background %s self-heal "
- "failed on %s", sh_type_str, local->loc.path);
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
+ loglevel = GF_LOG_ERROR;
+ } else if (!is_self_heal_failed (sh, AFR_CHECK_ALL)) {
+ loglevel = GF_LOG_INFO;
} else {
- gf_log (this->name, GF_LOG_INFO, "background %s self-heal "
- "completed on %s", sh_type_str, local->loc.path);
+ loglevel = GF_LOG_DEBUG;
}
+ afr_log_self_heal_completion_status (local, loglevel);
+
FRAME_SU_UNDO (bgsh_frame, afr_local_t);
- if (!sh->unwound) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
+ if (!sh->unwound && sh->unwind) {
+ orig_frame_local = sh->orig_frame->local;
+ orig_frame_sh = &orig_frame_local->self_heal;
+ orig_frame_sh->actual_sh_started = _gf_true;
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
}
if (sh->background) {
@@ -2017,102 +2327,125 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+ afr_self_heal_t *orig_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ loc_t *loc = NULL;
local = frame->local;
+ orig_sh = &local->self_heal;
priv = this->private;
GF_ASSERT (local->loc.path);
- if (local->self_heal.background) {
- LOCK (&priv->lock);
- {
- if (priv->background_self_heals_started
- < priv->background_self_heal_count) {
- priv->background_self_heals_started++;
-
-
- } else {
- local->self_heal.background = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
- }
-
gf_log (this->name, GF_LOG_TRACE,
"performing self heal on %s (metadata=%d data=%d entry=%d)",
local->loc.path,
- local->self_heal.need_metadata_self_heal,
- local->self_heal.need_data_self_heal,
- local->self_heal.need_entry_self_heal);
+ local->self_heal.do_metadata_self_heal,
+ local->self_heal.do_data_self_heal,
+ local->self_heal.do_entry_self_heal);
+ op_errno = ENOMEM;
sh_frame = copy_frame (frame);
- afr_set_lk_owner (sh_frame, this);
+ if (!sh_frame)
+ goto out;
+ afr_set_lk_owner (sh_frame, this, sh_frame->root);
+ afr_set_low_priority (sh_frame);
- sh_local = afr_local_copy (local, this);
+ sh_local = afr_self_heal_local_init (local, this);
+ if (!sh_local)
+ goto out;
sh_frame->local = sh_local;
sh = &sh_local->self_heal;
sh->inode = inode_ref (inode);
-
sh->orig_frame = frame;
sh->completion_cbk = afr_self_heal_completion_cbk;
- sh->buf = GF_CALLOC (priv->child_count, sizeof (struct iatt),
- gf_afr_mt_iatt);
- sh->parentbufs = GF_CALLOC (priv->child_count, sizeof (struct iatt),
- gf_afr_mt_iatt);
- sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int),
- gf_afr_mt_int);
- sh->success = GF_CALLOC (priv->child_count, sizeof (int),
- gf_afr_mt_int);
- sh->xattr = GF_CALLOC (priv->child_count, sizeof (dict_t *),
- gf_afr_mt_dict_t);
+ sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
+ gf_afr_mt_char);
+ if (!sh->success)
+ goto out;
sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
gf_afr_mt_int);
+ if (!sh->sources)
+ goto out;
sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
priv->child_count,
gf_afr_mt_int);
+ if (!sh->locked_nodes)
+ goto out;
+
+ sh->pending_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->pending_matrix)
+ goto out;
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
+ sh->delta_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->delta_matrix)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
+ sh->fresh_parent_dirs = afr_children_create (priv->child_count);
+ if (!sh->fresh_parent_dirs)
+ goto out;
+ ret = afr_sh_common_create (sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
}
- sh->delta_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
+ if (local->self_heal.background) {
+ LOCK (&priv->lock);
+ {
+ if (priv->background_self_heals_started
+ < priv->background_self_heal_count) {
+ priv->background_self_heals_started++;
+
+
+ } else {
+ local->self_heal.background = _gf_false;
+ sh->background = _gf_false;
+ }
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ if (!local->loc.parent) {
+ sh->do_missing_entry_self_heal = _gf_false;
+ sh->do_gfid_self_heal = _gf_false;
}
- sh->success_children = afr_fresh_children_create (priv->child_count);
- sh->fresh_children = afr_fresh_children_create (priv->child_count);
- sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count);
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
FRAME_SU_DO (sh_frame, afr_local_t);
- if (sh->need_missing_entry_self_heal) {
- afr_self_heal_conflicting_entries (sh_frame, this);
- } else if (sh->need_gfid_self_heal) {
- GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
- afr_self_heal_gfids (sh_frame, this);
+ if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
+ afr_self_heal_missing_entries (sh_frame, this);
} else {
+ loc = &sh_local->loc;
+ if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
+ if (!uuid_is_null (inode->gfid))
+ GF_ASSERT (!uuid_compare (inode->gfid,
+ sh->sh_gfid_req));
+ uuid_copy (loc->gfid, sh->sh_gfid_req);
+ }
gf_log (this->name, GF_LOG_TRACE,
"proceeding to metadata check on %s",
local->loc.path);
afr_sh_missing_entries_done (sh_frame, this);
}
+ op_errno = 0;
+out:
+ if (op_errno) {
+ orig_sh->unwind (frame, this, -1, op_errno, 1);
+ if (sh_frame)
+ AFR_STACK_DESTROY (sh_frame);
+ }
return 0;
}
@@ -2123,24 +2456,24 @@ afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
GF_ASSERT (str && (size > strlen (" missing-entry gfid "
"meta-data data entry")));
- if (self_heal_p->need_metadata_self_heal) {
+ if (self_heal_p->do_metadata_self_heal) {
snprintf (str, size, " meta-data");
}
- if (self_heal_p->need_data_self_heal) {
+ if (self_heal_p->do_data_self_heal) {
snprintf (str + strlen(str), size - strlen(str), " data");
}
- if (self_heal_p->need_entry_self_heal) {
+ if (self_heal_p->do_entry_self_heal) {
snprintf (str + strlen(str), size - strlen(str), " entry");
}
- if (self_heal_p->need_missing_entry_self_heal) {
+ if (self_heal_p->do_missing_entry_self_heal) {
snprintf (str + strlen(str), size - strlen(str),
" missing-entry");
}
- if (self_heal_p->need_gfid_self_heal) {
+ if (self_heal_p->do_gfid_self_heal) {
snprintf (str + strlen(str), size - strlen(str), " gfid");
}
}
@@ -2166,3 +2499,314 @@ afr_self_heal_type_for_transaction (afr_transaction_type type)
}
return sh_type;
}
+
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ int ret = -1;
+ uuid_t pargfid = {0};
+
+ if (!child)
+ goto out;
+
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
+
+ if (uuid_is_null (pargfid))
+ goto out;
+
+ if (strcmp (parent->path, "/") == 0)
+ ret = gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
+ name);
+
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting child path");
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
+
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && child)
+ loc_wipe (child);
+
+ return ret;
+}
+
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this))
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
+ sh->success, priv->child_count, type);
+
+ erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!erase_xattr)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+ erase_xattr[i] = dict_new ();
+ if (!erase_xattr[i])
+ goto out;
+ }
+ }
+
+ afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
+ priv->child_count, type);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ afr_sh_print_pending_matrix (sh->delta_matrix, this);
+ local->call_count = call_count;
+ if (call_count == 0) {
+ ret = 0;
+ finish (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ if (sh->healing_fd) {//true for ENTRY, reg file DATA transaction
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ sh->healing_fd,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ }
+ }
+
+ ret = 0;
+out:
+ if (erase_xattr) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ }
+
+ GF_FREE (erase_xattr);
+
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ finish (frame, this);
+ }
+
+ return 0;
+}
+
+void
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
+{
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
+
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
+}
+
+void
+afr_set_local_for_unhealable (afr_local_t *local)
+{
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ local->unhealable = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+}
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
+{
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
+
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
+
+ }
+
+out:
+ return sh_failed;
+}
+
+char *
+get_sh_completion_status (afr_self_heal_status status)
+{
+
+ char *not_attempted = " is not attempted";
+ char *failed = " failed";
+ char *started = " is started";
+ char *sync_begin = " is successfully completed";
+ char *result = " has unknown status";
+
+ switch (status)
+ {
+ case AFR_SELF_HEAL_NOT_ATTEMPTED:
+ result = not_attempted;
+ break;
+ case AFR_SELF_HEAL_FAILED:
+ result = failed;
+ break;
+ case AFR_SELF_HEAL_STARTED:
+ result = started;
+ break;
+ case AFR_SELF_HEAL_SYNC_BEGIN:
+ result = sync_begin;
+ break;
+ }
+
+ return result;
+
+}
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl)
+{
+
+ char sh_log[4096] = {0};
+ afr_self_heal_t *sh = &local->self_heal;
+ afr_sh_status_for_all_type all_status = sh->afr_all_sh_status;
+ xlator_t *this = NULL;
+ size_t off = 0;
+ int data_sh = 0;
+ int metadata_sh = 0;
+ int print_log = 0;
+
+ this = THIS;
+
+ ADD_FMT_STRING (sh_log, off, "gfid or missing entry",
+ all_status.gfid_or_missing_entry_self_heal, print_log);
+ ADD_FMT_STRING_SYNC (sh_log, off, "metadata",
+ all_status.metadata_self_heal, print_log);
+ if (sh->background) {
+ ADD_FMT_STRING_SYNC (sh_log, off, "backgroung data",
+ all_status.data_self_heal, print_log);
+ } else {
+ ADD_FMT_STRING_SYNC (sh_log, off, "foreground data",
+ all_status.data_self_heal, print_log);
+ }
+ ADD_FMT_STRING_SYNC (sh_log, off, "entry", all_status.entry_self_heal,
+ print_log);
+
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.data_self_heal &&
+ strcmp (sh->data_sh_info, "") && sh->data_sh_info )
+ data_sh = 1;
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.metadata_self_heal &&
+ strcmp (sh->metadata_sh_info, "") && sh->metadata_sh_info)
+ metadata_sh = 1;
+
+ if (!print_log)
+ return;
+
+ gf_log (this->name, loglvl, "%s %s %s on %s", sh_log,
+ ((data_sh == 1) ? sh->data_sh_info : ""),
+ ((metadata_sh == 1) ? sh->metadata_sh_info : ""),
+ local->loc.path);
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 043ebea2d..473264776 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -1,74 +1,58 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_COMMON_H__
#define __AFR_SELF_HEAL_COMMON_H__
#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
+#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
+ AFR_LOOKUP_FAIL_CONFLICTS = 1,
+ AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
+} afr_lookup_flags_t;
-typedef int
-(*afr_lookup_cbk_t) (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent);
int
afr_sh_select_source (int sources[], int child_count);
int
-afr_sh_sink_count (int sources[], int child_count);
-
-int
afr_sh_source_count (int sources[], int child_count);
void
afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
+void
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc);
+
int
afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
dict_t *xattr[], afr_transaction_type type,
size_t child_count);
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type);
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name);
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status);
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type);
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
-
void
afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
size_t size);
@@ -77,22 +61,84 @@ afr_self_heal_type
afr_self_heal_type_for_transaction (afr_transaction_type type);
int
-afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs,
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type);
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant);
void
afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
+
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc);
+
int
afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid);
+ afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
+ int32_t flags, dict_t *xdata);
int
afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf);
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf);
int
afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
char *base_name, afr_lock_cbk_t lock_cbk);
int
afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent);
+ int child_index);
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk);
+afr_local_t *
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this);
+int
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block, char *dom,
+ afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler);
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
+typedef int
+(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata);
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
+int
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame);
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this);
+
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count);
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs);
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this));
+
+void
+afr_set_local_for_unhealable (afr_local_t *local);
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
+
+void
+afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);
+
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this);
#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 74a1bf353..9de26ee56 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -49,31 +40,35 @@
#include "afr-self-heal-common.h"
#include "afr-self-heal-algorithm.h"
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this);
+
+static inline gf_boolean_t
+afr_sh_data_proceed (unsigned int success_count)
+{
+ return (success_count >= AFR_SH_MIN_PARTICIPANTS);
+}
+
+extern int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
+
+int
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
+
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
int
afr_sh_data_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- /*
- TODO: cleanup sh->*
- */
-
- if (sh->healing_fd && !sh->healing_fd_opened) {
- /* unref only if we created the fd ourselves */
-
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- /* for (i = 0; i < priv->child_count; i++) */
- /* sh->locked_nodes[i] = 0; */
sh->completion_cbk (frame, this);
@@ -83,7 +78,7 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)
int
afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -96,8 +91,8 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "flush or setattr failed on %s on subvolume %s: %s",
+ gf_log (this->name, GF_LOG_ERROR,
+ "flush failed on %s on subvolume %s: %s",
local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
}
@@ -113,18 +108,6 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- afr_sh_data_flush_cbk (frame, cookie, this, op_ret, op_errno);
-
- return 0;
-}
-
-
int
afr_sh_data_close (call_frame_t *frame, xlator_t *this)
{
@@ -133,66 +116,29 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
afr_self_heal_t *sh = NULL;
int i = 0;
int call_count = 0;
- int source = 0;
- int32_t valid = 0;
- struct iatt stbuf = {0,};
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- if (sh->healing_fd_opened) {
- /* not our job to close the fd */
-
- afr_sh_data_done (frame, this);
- return 0;
- }
-
if (!sh->healing_fd) {
+ //This happens when file is non-reg
afr_sh_data_done (frame, this);
return 0;
}
-
- call_count = (sh->active_sinks + 1) * 2;
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
local->call_count = call_count;
- /* closed source */
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[sh->source]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->flush,
- sh->healing_fd);
- call_count--;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- call_count--;
-
- if (call_count == 0)
+ if (call_count == 0) {
+ afr_sh_data_done (frame, this);
return 0;
+ }
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
+ if (!sh->success[i])
continue;
-
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"closing fd of %s on %s",
local->loc.path, priv->children[i]->name);
@@ -200,15 +146,7 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- sh->healing_fd);
-
- call_count--;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ sh->healing_fd, NULL);
if (!--call_count)
break;
@@ -217,28 +155,46 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
return 0;
}
+int
+afr_sh_dom_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->sh_dom_lock_held)
+ afr_sh_data_unlock (frame, this, priv->sh_domain,
+ afr_sh_data_close);
+ else
+ afr_sh_data_close (frame, this);
+ return 0;
+}
int
-afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
+
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
+ priv = this->private;
LOCK (&frame->lock);
{
if (op_ret == -1) {
gf_log (this->name, GF_LOG_INFO,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
+ "setattr failed on %s on subvolume %s: %s",
+ local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
}
}
UNLOCK (&frame->lock);
@@ -246,38 +202,145 @@ afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_sh_data_close (frame, this);
+ afr_sh_data_finish (frame, this);
}
return 0;
}
+int
+afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t valid = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
+
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
+ local->call_count = call_count;
+
+ if (call_count == 0) {
+ GF_ASSERT (0);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc, stbuf, valid, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+int
+afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->source == child_index);
+ if (op_ret != -1) {
+ sh->buf[child_index] = *buf;
+ afr_sh_data_setattr (frame, this, buf);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "time-stamps after self-heal", local->loc.path);
+ afr_sh_data_fail (frame, this);
+ }
+
+ return 0;
+}
+/*
+ * If there are any writes after the self-heal is triggered then the
+ * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
+ * stat on the source and then set the [am]times
+ */
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
+afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->fstat,
+ sh->healing_fd, NULL);
+ return 0;
+}
+
+//Fun fact, lock_cbk is being used for both lock & unlock
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
sh = &local->self_heal;
+ priv = this->private;
- GF_ASSERT (!sh->data_lock_held);
-
- int_lock->lock_cbk = afr_sh_data_close;
+ if (strcmp (dom, this->name) == 0) {
+ sh->data_lock_held = _gf_false;
+ } else if (strcmp (dom, priv->sh_domain) == 0) {
+ sh->sh_dom_lock_held = _gf_false;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = dom;
afr_unlock (frame, this);
+out:
+ if (ret) {
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_cbk (frame, this);
+ }
return 0;
}
-
int
afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
sh = &local->self_heal;
@@ -285,141 +348,309 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG,
"finishing data selfheal of %s", local->loc.path);
- if (!sh->data_lock_held)
- afr_sh_data_unlock (frame, this);
+ if (sh->data_lock_held)
+ afr_sh_data_unlock (frame, this, this->name, afr_sh_dom_unlock);
else
- afr_sh_data_close (frame, this);
+ afr_sh_dom_unlock (frame, this);
return 0;
}
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing failed data selfheal of %s", local->loc.path);
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_finish (frame, this);
+ return 0;
+}
int
afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
int call_count = 0;
- long i = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child_index = (long) cookie;
- local = frame->local;
priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change "
+ "log failed on %s for subvol %s, reason: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
- afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- afr_sh_data_finish (frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ if (!IA_ISREG (sh->type)) {
+ afr_sh_data_finish (frame, this);
+ goto out;
+ }
+ GF_ASSERT (sh->old_loop_frame);
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_post_sh_big_lock_success,
+ afr_post_sh_big_lock_failure);
}
-
+out:
return 0;
}
-
int
afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+ return 0;
+}
+
+int
+afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ sh = &local->self_heal;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to fsync on "
+ "%s - %s", local->loc.path,
+ priv->children[child_index]->name, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ UNLOCK (&frame->lock);
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ }
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_erase_pending (frame, this);
+ }
+ return 0;
+}
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
+/*
+ * Before erasing xattrs, make sure the data is written to disk
+ */
+int
+afr_sh_data_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
+ call_count = sh->active_sinks;
+ if (call_count == 0) {
+ afr_sh_data_erase_pending (frame, this);
+ return 0;
}
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
+ if (!sh->success[i] || sh->sources[i])
continue;
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
+ STACK_WIND_COOKIE (frame, afr_sh_data_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync,
+ sh->healing_fd, 1, NULL);
+ }
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
+ return 0;
+}
+
+static struct afr_sh_algorithm *
+sh_algo_from_name (xlator_t *this, char *name)
+{
+ int i = 0;
+
+ if (name == NULL)
+ goto out;
+
+ while (afr_self_heal_algorithms[i].name) {
+ if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
+ return &afr_self_heal_algorithms[i];
+ }
+
+ i++;
+ }
+
+out:
+ return NULL;
+}
+
+
+static int
+sh_zero_byte_files_exist (afr_local_t *local, int child_count)
+{
+ int i = 0;
+ int ret = 0;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+ for (i = 0; i < child_count; i++) {
+ if (!local->child_up[i] || sh->child_errno[i])
+ continue;
+ if (sh->buf[i].ia_size == 0) {
+ ret = 1;
break;
+ }
}
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
+ return ret;
+}
+
+
+struct afr_sh_algorithm *
+afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ struct afr_sh_algorithm * algo = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
+
+ if (algo == NULL) {
+ /* option not set, so fall back on heuristics */
+
+ if (sh_zero_byte_files_exist (local, priv->child_count)
+ || (sh->file_size <= (priv->data_self_heal_window_size *
+ this->ctx->page_size))) {
+
+ /*
+ * If the file does not exist on one of the subvolumes,
+ * or a zero-byte file exists (created by entry self-heal)
+ * the entire content has to be copied anyway, so there
+ * is no benefit from using the "diff" algorithm.
+ *
+ * If the file size is about the same as page size,
+ * the entire file can be read and written with a few
+ * (pipelined) STACK_WINDs, which will be faster
+ * than "diff" which has to read checksums and then
+ * read and write.
+ */
+
+ algo = sh_algo_from_name (this, "full");
+
+ } else {
+ algo = sh_algo_from_name (this, "diff");
}
}
- GF_FREE (erase_xattr);
- return 0;
+ return algo;
}
int
+afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ struct afr_sh_algorithm *sh_algo = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->algo_completion_cbk = afr_sh_data_fsync;
+ sh->algo_abort_cbk = afr_sh_data_fail;
+
+ sh_algo = afr_sh_data_pick_algo (frame, this);
+
+ sh->algo = sh_algo;
+ sh_algo->fn (frame, this);
+
+ return 0;
+}
+
+int
afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
int call_count = 0;
int child_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- priv = this->private;
+ priv = this->private;
local = frame->local;
+ sh = &local->self_heal;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_INFO,
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
"ftruncate of %s on subvolume %s failed (%s)",
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_TRACE,
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
"ftruncate of %s on subvolume %s completed",
local->loc.path,
priv->children[child_index]->name);
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_sh_data_erase_pending (frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_sync_prepare (frame, this);
}
return 0;
@@ -454,7 +685,8 @@ afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
+ sh->healing_fd, sh->file_size,
+ NULL);
if (!--call_count)
break;
@@ -463,162 +695,304 @@ afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
return 0;
}
+int
+afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ int i = 0;
-static struct afr_sh_algorithm *
-sh_algo_from_name (xlator_t *this, char *name)
+ priv = this->private;
+ sh->source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (sh->source < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == sh->source || sh->child_errno[i])
+ continue;
+
+ if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[sh->source]))
+ sh->sources[i] = 0;
+ }
+
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+out:
+ return ret;
+}
+
+char*
+afr_get_sizes_str (afr_local_t *local, struct iatt *bufs, xlator_t *this)
{
- int i = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sizes_str = NULL;
+ size_t off = 0;
+ char *fmt_str = "%llu bytes on %s, ";
+ char *child_down = " %s,";
+ char *child_unknown = " %s,";
+ int down_child_present = 0;
+ int down_count = 0;
+ int unknown_count = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is ";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
- while (afr_self_heal_algorithms[i].name) {
- if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
- return &afr_self_heal_algorithms[i];
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ len += snprintf (num, sizeof (num), fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), child_down,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count ++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), child_unknown,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
}
- i++;
}
- return NULL;
-}
+ if (down_child_present) {
+ if (down_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
+ len++;//for '\0'
-static int
-sh_zero_byte_files_exist (afr_self_heal_t *sh, int child_count)
-{
- int i;
- int ret = 0;
+ sizes_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
- for (i = 0; i < child_count; i++) {
- if (sh->buf[i].ia_size == 0) {
- ret = 1;
- break;
+ if (!sizes_str)
+ return NULL;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ off += snprintf (sizes_str + off, len - off, fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
}
}
- return ret;
-}
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0) {
+ off += snprintf (sizes_str + off, len - off, child_down,
+ priv->children[i]->name);
+ }
+ }
-struct afr_sh_algorithm *
-afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1) {
+ off += snprintf (sizes_str + off, len - off,
+ child_unknown,
+ priv->children[i]->name);
+
+ }
+ }
+
+ return sizes_str;
+}
+
+char*
+afr_get_sinks_str (xlator_t *this, afr_local_t *local, afr_self_heal_t *sh)
{
- afr_private_t * priv = NULL;
- struct afr_sh_algorithm * algo = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sinks_str = NULL;
+ char *temp_str = " to sinks ";
+ char *str_format = " %s,";
+ char off = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
+ priv = this->private;
- if (algo == NULL) {
- /* option not set, so fall back on heuristics */
+ len += snprintf (num, sizeof (num), "%s", temp_str);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), str_format,
+ priv->children[i]->name);
+ }
+ }
- if ((local->enoent_count != 0)
- || sh_zero_byte_files_exist (sh, priv->child_count)
- || (sh->file_size <= (priv->data_self_heal_window_size *
- this->ctx->page_size))) {
+ len ++;
- /*
- * If the file does not exist on one of the subvolumes,
- * or a zero-byte file exists (created by entry self-heal)
- * the entire content has to be copied anyway, so there
- * is no benefit from using the "diff" algorithm.
- *
- * If the file size is about the same as page size,
- * the entire file can be read and written with a few
- * (pipelined) STACK_WINDs, which will be faster
- * than "diff" which has to read checksums and then
- * read and write.
- */
+ sinks_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
- algo = sh_algo_from_name (this, "full");
+ if (!sinks_str)
+ return NULL;
- } else {
- algo = sh_algo_from_name (this, "diff");
+ off += snprintf (sinks_str + off, len - off, "%s", temp_str);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ off += snprintf (sinks_str + off, len - off,
+ str_format,
+ priv->children[i]->name);
}
}
- return algo;
+ return sinks_str;
+
}
-int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+void
+afr_set_data_sh_info_str (afr_local_t *local, afr_self_heal_t *sh, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ char *pending_matrix_str = NULL;
+ char *sizes_str = NULL;
+ char *sinks_str = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
+
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ sizes_str = afr_get_sizes_str (local, sh->buf, this);
+ if (!sizes_str)
+ sizes_str = "";
+
+ sinks_str = afr_get_sinks_str (this, local, sh);
+ if (!sinks_str)
+ sinks_str = "";
+
+ gf_asprintf (&sh->data_sh_info, " data self heal from %s %s with "
+ "%s data %s", priv->children[sh->source]->name, sinks_str,
+ sizes_str, pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (sizes_str && strcmp (sizes_str, ""))
+ GF_FREE (sizes_str);
+}
+
+void
+afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+{
int source = 0;
- int i = 0;
- struct afr_sh_algorithm *sh_algo = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
+ source = sh->source;
+ sh->block_size = this->ctx->page_size;
+ sh->file_size = sh->buf[source].ia_size;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
+ if (FILE_HAS_HOLES (&sh->buf[source]))
+ sh->file_has_holes = 1;
+
+ if (sh->background && sh->unwind && !sh->unwound) {
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
+ sh->unwound = _gf_true;
}
- sh->success[source] = 1;
- if (active_sinks == 0) {
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
gf_log (this->name, GF_LOG_INFO,
"no active sinks for performing self-heal on file %s",
local->loc.path);
afr_sh_data_finish (frame, this);
- return 0;
+ return;
}
- sh->active_sinks = active_sinks;
gf_log (this->name, GF_LOG_DEBUG,
"self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[source]->name, active_sinks);
-
- sh->algo_completion_cbk = afr_sh_data_trim_sinks;
- sh->algo_abort_cbk = afr_sh_data_finish;
-
- sh_algo = afr_sh_data_pick_algo (frame, this);
-
- sh_algo->fn (frame, this);
+ local->loc.path, priv->children[sh->source]->name,
+ sh->active_sinks);
- return 0;
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_sh_data_trim_sinks (frame, this);
}
-
int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_local_t * orig_local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int nsources = 0;
- int source = 0;
- int i = 0;
+ int ret = 0;
+ int *old_sources = NULL;
+ int tstamp_source = 0;
+ int i = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_data_finish (frame, this);
- return 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ if (sh->sync_done) {
+ //store sources before sync so that mtime can be set using the
+ //iatt buf from one of them.
+ old_sources = alloca (priv->child_count*sizeof (*old_sources));
+ memcpy (old_sources, sh->sources,
+ priv->child_count * sizeof (*old_sources));
}
+ nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
+ sh->sources, sh->success_children,
+ AFR_DATA_TRANSACTION, NULL, _gf_true);
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -636,123 +1010,64 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
}
if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible "
- "split-brain). Please delete the file from all but "
- "the preferred subvolume.", local->loc.path);
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, SPB);
- local->govinda_gOvinda = 1;
-
- afr_sh_data_finish (frame, this);
+ afr_sh_data_fail (frame, this);
return 0;
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB);
- if (source == -1) {
+ ret = afr_sh_inode_set_read_ctx (sh, this);
+ if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"No active sources found.");
- afr_sh_data_finish (frame, this);
+ afr_sh_data_fail (frame, this);
return 0;
}
- sh->source = source;
- sh->block_size = 65536; /* TODO: make it configurable or use macro */
- sh->file_size = sh->buf[source].ia_size;
-
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
-
- orig_local = sh->orig_frame->local;
- orig_local->cont.lookup.buf.ia_size = sh->buf[source].ia_size;
-
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-
- /*
- quick-read might have read the file, so send xattr from
- the source subvolume (http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=815)
- */
-
- dict_unref (orig_local->cont.lookup.xattr);
- if (orig_local->cont.lookup.xattrs)
- orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]);
-
- if (sh->background) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
- sh->unwound = _gf_true;
- }
-
- afr_sh_data_sync_prepare (frame, this);
-
- return 0;
-}
-
-static void
-afr_destroy_pending_matrix (int32_t **pending_matrix, int32_t child_count)
-{
- int i = 0;
- GF_ASSERT (child_count > 0);
- if (pending_matrix) {
- for (i = 0; i < child_count; i++) {
- if (pending_matrix[i])
- GF_FREE (pending_matrix[i]);
+ if (sh->sync_done) {
+ /* Perform setattr from one of the old_sources if possible
+ * Because only they have the correct mtime, the new sources
+ * (i.e. old sinks) have mtime from last writev in sync.
+ */
+ tstamp_source = sh->source;
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_sources[i] && sh->sources[i])
+ tstamp_source = i;
}
- GF_FREE (pending_matrix);
- }
-}
-
-static int32_t**
-afr_create_pending_matrix (int32_t child_count)
-{
- gf_boolean_t cleanup = _gf_false;
- int32_t **pending_matrix = NULL;
- int i = 0;
-
- GF_ASSERT (child_count > 0);
+ afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]);
+ } else {
+ afr_set_data_sh_info_str (local, sh, this);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No self-heal needed for %s",
+ local->loc.path);
- pending_matrix = GF_CALLOC (sizeof (*pending_matrix), child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix)
- goto out;
- for (i = 0; i < child_count; i++) {
- pending_matrix[i] = GF_CALLOC (sizeof (**pending_matrix),
- child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix[i]) {
- cleanup = _gf_true;
- goto out;
+ afr_sh_data_finish (frame, this);
+ return 0;
}
+
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ afr_sh_data_fix (frame, this);
+ else
+ afr_sh_data_finish (frame, this);
}
-out:
- if (_gf_true == cleanup) {
- afr_destroy_pending_matrix (pending_matrix, child_count);
- pending_matrix = NULL;
- }
- return pending_matrix;
+ return 0;
}
int
afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
dict_t **xattr,
- afr_transaction_type txn_type)
+ afr_transaction_type txn_type,
+ uuid_t gfid)
{
afr_private_t *priv = NULL;
int read_child = -1;
- int ret = -1;
int32_t **pending_matrix = NULL;
int32_t *sources = NULL;
int32_t *success_children = NULL;
@@ -760,28 +1075,41 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
int32_t nsources = 0;
int32_t prev_read_child = -1;
int32_t config_read_child = -1;
- afr_self_heal_t *sh = NULL;
+ int32_t subvol_status = 0;
priv = this->private;
bufs = local->cont.lookup.bufs;
success_children = local->cont.lookup.success_children;
- sh = &local->self_heal;
-
- pending_matrix = afr_create_pending_matrix (priv->child_count);
- if (NULL == pending_matrix)
- goto out;
- sources = GF_CALLOC (sizeof (*sources), priv->child_count,
- gf_afr_mt_int32_t);
- if (NULL == sources)
- goto out;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
- sources, success_children, txn_type);
- if (nsources < 0) {
- ret = -1;
- goto out;
+ sources, success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: Possible split-brain",
+ local->loc.path);
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ local->cont.lookup.possible_spb = _gf_true;
+ nsources = 1;
+ sources[success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child (this,
+ success_children,
+ bufs);
+ sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ break;
+ }
}
+ if (nsources < 0)
+ goto out;
prev_read_child = local->read_child_index;
config_read_child = priv->read_child;
@@ -789,23 +1117,18 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
priv->child_count,
prev_read_child,
config_read_child,
- sources);
- ret = 0;
- local->cont.lookup.sources = sources;
+ sources,
+ priv->hash_mode, gfid);
out:
- afr_destroy_pending_matrix (pending_matrix, priv->child_count);
- if (-1 == ret) {
- if (sources)
- GF_FREE (sources);
- }
- gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d", read_child);
+ gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
+ read_child);
return read_child;
}
int
afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -828,6 +1151,12 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
sh->buf[child_index] = *buf;
sh->success_children[sh->success_count] = child_index;
sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed "
+ "on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
}
}
UNLOCK (&frame->lock);
@@ -835,9 +1164,20 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_sh_data_fix (frame, this);
+ /* Previous versions of glusterfs might have set
+ * the pending data xattrs which need to be erased
+ */
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "inspecting metadata "
+ "succeeded on < %d children, aborting "
+ "self-heal for %s", AFR_SH_MIN_PARTICIPANTS,
+ local->loc.path);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ afr_sh_data_fxattrop_fstat_done (frame, this);
}
-
+out:
return 0;
}
@@ -848,46 +1188,52 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
+ int call_count = 0;
+ int i = 0;
+ int child = 0;
+ int32_t *fstat_children = NULL;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
+ fstat_children = memdup (sh->success_children,
+ sizeof (*fstat_children) * priv->child_count);
+ if (!fstat_children) {
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ call_count = sh->success_count;
local->call_count = call_count;
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
afr_reset_children (sh->success_children, priv->child_count);
sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fstat,
- sh->healing_fd);
-
- if (!--call_count)
- break;
- }
+ child = fstat_children[i];
+ if (child == -1)
+ break;
+ STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
+ (void *) (long) child,
+ priv->children[child],
+ priv->children[child]->fops->fstat,
+ sh->healing_fd, NULL);
+ --call_count;
}
-
+ GF_ASSERT (!call_count);
+out:
+ GF_FREE (fstat_children);
return 0;
}
-
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+void
+afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- int call_count = -1;
int child_index = (long) cookie;
local = frame->local;
@@ -903,16 +1249,46 @@ afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
priv->children[child_index]->name);
sh->xattr[child_index] = dict_ref (xattr);
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s "
+ "failed on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
}
}
UNLOCK (&frame->lock);
+}
- call_count = afr_frame_return (frame);
+int
+afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
+ op_errno, xattr);
+
+ call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s, inspecting "
+ "change log succeeded on < %d children",
+ local->loc.path, AFR_SH_MIN_PARTICIPANTS);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
afr_sh_data_fstat (frame, this);
}
-
+out:
return 0;
}
@@ -923,46 +1299,61 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
+ dict_t **xattr_req;
int32_t *zero_pending = NULL;
int call_count = 0;
int i = 0;
int ret = 0;
+ int j;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
local->call_count = call_count;
- xattr_req = dict_new();
- if (!xattr_req) {
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
- gf_afr_mt_int32_t);
- if (!zero_pending) {
- ret = -1;
- goto out;
- }
- ret = dict_set_dynptr (xattr_req, priv->pending_key[i],
- zero_pending,
- 3 * sizeof (*zero_pending));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value");
- goto out;
- } else {
- zero_pending = NULL;
- }
- }
-
+ xattr_req = GF_CALLOC(priv->child_count, sizeof(struct dict_t *),
+ gf_afr_mt_dict_t);
+ if (!xattr_req)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ xattr_req[i] = dict_new();
+ if (!xattr_req[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
+ gf_afr_mt_int32_t);
+ if (!zero_pending) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynptr (xattr_req[i], priv->pending_key[j],
+ zero_pending,
+ 3 * sizeof (*zero_pending));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value");
+ goto out;
+ } else {
+ zero_pending = NULL;
+ }
+ }
+ }
+
+ afr_reset_xattr (sh->xattr, priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
@@ -970,7 +1361,7 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req);
+ xattr_req[i], NULL);
if (!--call_count)
break;
@@ -978,24 +1369,53 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
}
out:
- if (xattr_req)
- dict_unref (xattr_req);
+ if (xattr_req) {
+ for (i = 0; i < priv->child_count; i++)
+ if (xattr_req[i])
+ dict_unref(xattr_req[i]);
+ GF_FREE(xattr_req);
+ }
if (ret) {
- if (zero_pending)
- GF_FREE (zero_pending);
- sh->op_failed = 1;
- afr_sh_data_done (frame, this);
+ GF_FREE (zero_pending);
+ afr_sh_data_fail (frame, this);
}
return 0;
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this);
+afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->data_lock_held = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
+
+int
+afr_sh_dom_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_dom_lock_held = _gf_true;
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_sh_data_big_lock_success,
+ afr_sh_data_fail);
+ return 0;
+}
int
-afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -1006,22 +1426,25 @@ afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking data inodelks "
- "failed for %s.", local->loc.path);
- sh->op_failed = 1;
- afr_sh_data_done (frame, this);
+ gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ sh->data_lock_failure_handler (frame, this);
} else {
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "done for %s. Proceeding to FOP", local->loc.path);
- afr_sh_data_fxattrop (frame, this);
+ gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
+ "done for %s by %s. Proceding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ sh->data_lock_success_handler (frame, this);
}
return 0;
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this)
+afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -1031,49 +1454,114 @@ afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
sh = &local->self_heal;
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ if (!sh->data_lock_block) {
+ sh->data_lock_failure_handler(frame, this);
+ } else {
+ int_lock->lock_cbk =
+ afr_sh_data_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ }
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "done for %s by %s. Proceeding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+ sh->data_lock_success_handler (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, char *dom,
+ off_t start, off_t len)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = 0;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
+ int_lock->domain = dom;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->flock.l_start = start;
+ inodelk->flock.l_len = len;
+ inodelk->flock.l_type = F_WRLCK;
+
afr_nonblocking_inodelk (frame, this);
return 0;
}
+int
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ sh->data_lock_held = _gf_true;
+ sh->sync_done = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
+
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_set_timestamps (frame, this);
+ return 0;
+}
+
int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block,
+ char *dom, afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
afr_self_heal_t * sh = NULL;
-
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- if (sh->data_lock_held) {
- /* caller has held the lock already,
- so skip locking */
- afr_sh_data_fxattrop (frame, this);
- return 0;
- }
-
- return afr_sh_data_lock_rec (frame, this);
+ sh->data_lock_success_handler = success_handler;
+ sh->data_lock_failure_handler = failure_handler;
+ sh->data_lock_block = block;
+ return afr_sh_data_lock_rec (frame, this, dom, start, len);
}
-
int
afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -1099,21 +1587,21 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "open of %s succeeded on child %s",
+ local->loc.path,
+ priv->children[child_index]->name);
}
-
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_data_fail (frame, this);
return 0;
}
@@ -1121,7 +1609,8 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"fd for %s opened, commencing sync",
local->loc.path);
- afr_sh_data_lock (frame, this);
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, priv->sh_domain,
+ afr_sh_dom_lock_success, afr_sh_data_fail);
}
return 0;
@@ -1142,14 +1631,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (sh->healing_fd_opened) {
- /* caller has opened the fd for us already, so skip open */
-
- afr_sh_data_lock (frame, this);
- return 0;
- }
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
fd = fd_create (local->loc.inode, frame->root->pid);
@@ -1165,7 +1647,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->open,
&local->loc,
- O_RDWR|O_LARGEFILE, fd, 0);
+ O_RDWR|O_LARGEFILE, fd, NULL);
if (!--call_count)
break;
@@ -1174,19 +1656,93 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
return 0;
}
+void
+afr_sh_non_reg_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ if (op_ret < 0) {
+ afr_sh_data_fail (frame, this);
+ return;
+ }
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count ; i++) {
+ if (1 == local->child_up[i])
+ sh->success[i] = 1;
+ }
+
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+}
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ sh->data_lock_held = _gf_true;
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_non_reg_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ return 0;
+}
+
+gf_boolean_t
+afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ return _gf_true;
+ return _gf_false;
+}
- if (sh->need_data_self_heal && priv->data_self_heal) {
- afr_sh_data_open (frame, this);
+int
+afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = this->private;
+ int ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
+
+ if (afr_can_start_data_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[1],
+ priv->sh_domain, priv->child_count);
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+
+ if (IA_ISREG (sh->type)) {
+ afr_sh_data_open (frame, this);
+ } else {
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true,
+ this->name,
+ afr_sh_non_reg_lock_success,
+ afr_sh_data_fail);
+ }
} else {
gf_log (this->name, GF_LOG_TRACE,
"not doing data self heal on %s",
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 556ea8027..53491a1d7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -49,28 +40,26 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
+ do {\
+ _local = _frame->local;\
+ _sh = &_local->self_heal;\
+ _sh_frame = _sh->sh_frame;\
+ _sh_local = _sh_frame->local;\
+ _sh_sh = &_sh_local->self_heal;\
+ } while (0);
+
+int
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
int
afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- /*
- TODO: cleanup sh->*
- */
-
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
-
- /* for (i = 0; i < priv->child_count; i++) { */
- /* sh->locked_nodes[i] = 0; */
- /* } */
sh->completion_cbk (frame, this);
@@ -113,7 +102,7 @@ afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
int
afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
long i = 0;
int call_count = 0;
@@ -130,7 +119,7 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
i = (long)cookie;
- afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_INFO,
"%s: failed to erase pending xattrs on %s (%s)",
@@ -168,66 +157,20 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
+ if (sh->entries_skipped) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ goto out;
}
- GF_FREE (erase_xattr);
-
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
-
+ afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
+ afr_sh_entry_erase_pending_cbk,
+ afr_sh_entry_finish);
+ return 0;
+out:
+ afr_sh_entry_finish (frame, this);
return 0;
}
@@ -308,57 +251,11 @@ next_active_sink (call_frame_t *frame, xlator_t *this,
return next_active_sink;
}
-
-int
-build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
-{
- int ret = -1;
-
- if (!child) {
- goto out;
- }
-
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
-
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
- }
-
- if (!child->path) {
- goto out;
- }
-
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
-
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
-
- if (!child->inode) {
- goto out;
- }
-
- ret = 0;
-out:
- if (ret == -1)
- loc_wipe (child);
-
- return ret;
-}
-
-
int
afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this);
int
afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
@@ -386,7 +283,8 @@ int
afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
@@ -422,19 +320,17 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
afr_self_heal_t *expunge_sh = NULL;
int active_src = 0;
- call_frame_t *frame = NULL;
int32_t valid = 0;
priv = this->private;
expunge_local = expunge_frame->local;
expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
active_src = (long) cookie;
@@ -452,7 +348,6 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
}
valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- afr_build_parent_loc (&expunge_sh->parent_loc, &expunge_local->loc);
STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
(void *) (long) active_src,
@@ -460,7 +355,7 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
priv->children[active_src]->fops->setattr,
&expunge_sh->parent_loc,
&expunge_sh->parentbuf,
- valid);
+ valid, NULL);
return 0;
}
@@ -484,7 +379,7 @@ afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->unlink,
- &expunge_local->loc);
+ &expunge_local->loc, 0, NULL);
return 0;
}
@@ -509,7 +404,7 @@ afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->rmdir,
- &expunge_local->loc, 1);
+ &expunge_local->loc, 1, NULL);
return 0;
}
@@ -517,7 +412,8 @@ afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
int
afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf)
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
@@ -526,6 +422,7 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
int type = 0;
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
+ loc_t *loc = NULL;
priv = this->private;
expunge_local = expunge_frame->local;
@@ -533,8 +430,11 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
frame = expunge_sh->sh_frame;
local = frame->local;
sh = &local->self_heal;
+ loc = &expunge_local->loc;
type = buf->ia_type;
+ if (loc->parent && uuid_is_null (loc->parent->gfid))
+ uuid_copy (loc->pargfid, parentbuf->ia_gfid);
switch (type) {
case IA_IFSOCK:
@@ -598,7 +498,8 @@ afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
goto out;
}
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
+ afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf,
+ postparent);
return 0;
out:
@@ -627,7 +528,7 @@ afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
+ &expunge_local->loc, NULL);
return 0;
}
@@ -679,7 +580,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
if (need_expunge) {
gf_log (this->name, GF_LOG_INFO,
- "missing entry %s on %s",
+ "Entry %s is missing on %s and deleting from "
+ "replica's other bricks",
expunge_local->loc.path,
priv->children[source]->name);
@@ -711,6 +613,19 @@ out:
return 0;
}
+static gf_boolean_t
+can_skip_entry_self_heal (char *name, loc_t *parent_loc)
+{
+ if (strcmp (name, ".") == 0) {
+ return _gf_true;
+ } else if (strcmp (name, "..") == 0) {
+ return _gf_true;
+ } else if (loc_is_root (parent_loc) &&
+ (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
int
afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
@@ -738,21 +653,13 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
sh->expunge_done = afr_sh_entry_expunge_entry_done;
name = entry->d_name;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
+ if (can_skip_entry_self_heal (name, &local->loc)) {
op_ret = 0;
goto out;
}
gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
+ "inspecting existence of %s under %s",
name, local->loc.path);
expunge_frame = copy_frame (frame);
@@ -761,15 +668,17 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
goto out;
}
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
expunge_frame->local = expunge_local;
expunge_sh = &expunge_local->self_heal;
expunge_sh->sh_frame = frame;
expunge_sh->active_source = active_src;
expunge_sh->entrybuf = entry->d_stat;
+ loc_copy (&expunge_sh->parent_loc, &local->loc);
- ret = build_child_loc (this, &expunge_local->loc, &local->loc, name);
+ ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
+ name);
if (ret != 0) {
op_errno = EINVAL;
goto out;
@@ -784,7 +693,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
(void *) (long) source,
priv->children[source],
priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
+ &expunge_local->loc, NULL);
ret = 0;
out:
@@ -799,7 +708,7 @@ int
afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -867,7 +776,7 @@ afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
priv->children[active_src],
priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
return 0;
}
@@ -897,7 +806,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_sink (frame, this, sh->active_source);
sh->active_source = active_src;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
goto out;
}
@@ -922,48 +831,58 @@ out:
int
afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_ret, int32_t op_errno)
{
int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ sh->entries_skipped = _gf_true;
call_count = afr_frame_return (frame);
-
if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ afr_sh_entry_impunge_subvol (frame, this);
return 0;
}
+void
+afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
+}
int
afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
int call_count = 0;
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
int child_index = 0;
- int32_t impunge_ret_child = 0;
priv = this->private;
impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = impunge_sh->active_source;
child_index = (long) cookie;
if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"setattr done for %s on %s",
impunge_local->loc.path,
priv->children[child_index]->name);
@@ -975,39 +894,117 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
strerror (op_errno));
}
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ 0, op_errno);
}
- UNLOCK (&impunge_frame->lock);
- if (call_count == 0) {
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
+ return 0;
+}
+
+int
+afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *setattr_local = NULL;
+
+ setattr_local = setattr_frame->local;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr on parent directory (%s) failed: %s",
+ setattr_local->loc.path, strerror (op_errno));
}
+ call_count = afr_frame_return (setattr_frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (setattr_frame);
return 0;
}
+int
+afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *setattr_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *setattr_frame = NULL;
+ int32_t valid = 0;
+ int32_t op_errno = 0;
+ int child_index = 0;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting ownership of %s on %s to %d/%d",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ impunge_sh->entrybuf.ia_uid,
+ impunge_sh->entrybuf.ia_gid);
+
+ setattr_frame = copy_frame (impunge_frame);
+ if (!setattr_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out);
+ setattr_local = setattr_frame->local;
+ call_count = afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0);
+ loc_copy (&setattr_local->loc, &impunge_sh->parent_loc);
+ impunge_local->call_count = call_count;
+ setattr_local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (impunge_sh->child_errno[i])
+ continue;
+ valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (setattr_frame,
+ afr_sh_entry_impunge_parent_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &setattr_local->loc,
+ &impunge_sh->parentbuf, valid, NULL);
+
+ valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &impunge_local->loc,
+ &impunge_sh->entrybuf, valid, NULL);
+ call_count--;
+ }
+ GF_ASSERT (!call_count);
+ return 0;
+out:
+ if (setattr_frame)
+ AFR_STACK_DESTROY (setattr_frame);
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno);
+ return 0;
+}
int
afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
int child_index = 0;
- struct iatt stbuf = {0};
- int32_t valid = 0;
priv = this->private;
impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
child_index = (long) cookie;
@@ -1017,188 +1014,227 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
impunge_local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
+ goto out;
}
- gf_log (this->name, GF_LOG_TRACE,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.ia_uid,
- impunge_local->cont.lookup.buf.ia_gid);
-
- stbuf.ia_atime = impunge_local->cont.lookup.buf.ia_atime;
- stbuf.ia_atime_nsec = impunge_local->cont.lookup.buf.ia_atime_nsec;
- stbuf.ia_mtime = impunge_local->cont.lookup.buf.ia_mtime;
- stbuf.ia_mtime_nsec = impunge_local->cont.lookup.buf.ia_mtime_nsec;
-
- stbuf.ia_uid = impunge_local->cont.lookup.buf.ia_uid;
- stbuf.ia_gid = impunge_local->cont.lookup.buf.ia_gid;
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &impunge_local->loc,
- &stbuf, valid);
+ afr_sh_entry_impunge_setattr (impunge_frame, this);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
-
int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
+ xlator_t *this)
{
- loc_t *parent_loc = cookie;
+ int active_src = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr on parent directory (%s) failed: %s",
- parent_loc->path, strerror (op_errno));
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ afr_prepare_new_entry_pending_matrix (impunge_local->pending,
+ afr_is_errno_unset,
+ impunge_sh->child_errno,
+ &impunge_sh->entrybuf,
+ priv->child_count);
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto out;
}
- loc_wipe (parent_loc);
+ afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src,
+ LOCAL_LAST);
- GF_FREE (parent_loc);
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->xattrop,
+ &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
- AFR_STACK_DESTROY (setattr_frame);
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
-
int
afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int call_count = 0;
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
int child_index = 0;
- int32_t *pending_array = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- loc_t *parent_loc = NULL;
- struct iatt parentbuf = {0,};
- int32_t impunge_ret_child = 0;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = impunge_sh->active_source;
child_index = (long) cookie;
if (op_ret == -1) {
- ret = -1;
+ impunge_sh->child_errno[child_index] = op_errno;
gf_log (this->name, GF_LOG_ERROR,
"creation of %s on %s failed (%s)",
impunge_local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- goto out;
+ } else {
+ impunge_sh->child_errno[child_index] = 0;
}
- inode->ia_type = stbuf->ia_type;
-
- xattr = dict_new ();
- if (!xattr) {
- ret = -1;
- goto out;
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ if (!afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0)) {
+ // new_file creation failed every where
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
+ goto out;
+ }
+ afr_sh_entry_impunge_perform_xattrop (impunge_frame, this);
}
+out:
+ return 0;
+}
- pending_array = (int32_t*) GF_CALLOC (3, sizeof (*pending_array),
- gf_afr_mt_int32_t);
+int
+afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
- if (!pending_array) {
- ret = -1;
- goto out;
- }
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (IA_ISDIR (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
-
- ret = dict_set_dynptr (xattr, priv->pending_key[child_index],
- pending_array,
- 3 * sizeof (*pending_array));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- } else {
- pending_array = NULL;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ //For symlinks impunge is attempted un-conditionally
+ //So the file can already exist.
+ if ((op_ret < 0) && (op_errno == EEXIST))
+ op_ret = 0;
}
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parentbuf = impunge_sh->parentbuf;
- setattr_frame = copy_frame (impunge_frame);
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- if (!parent_loc) {
- ret = -1;
- goto out;
- }
- afr_build_parent_loc (parent_loc, &impunge_local->loc);
+ return 0;
+}
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
+int
+afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ loc_t *loc = NULL;
+ struct iatt *buf = NULL;
+ loc_t oldloc = {0};
- STACK_WIND_COOKIE (setattr_frame, afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &parentbuf, valid);
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc = &impunge_local->loc;
+ buf = &impunge_sh->entrybuf;
-out:
- if (xattr)
- dict_unref (xattr);
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, buf->ia_gfid);
+ gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s",
+ loc->path, priv->children[child_index]->name);
- if (ret) {
- if (pending_array)
- GF_FREE (pending_array);
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->link,
+ &oldloc, loc, NULL);
+ loc_wipe (&oldloc);
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ return 0;
+}
- if (call_count == 0) {
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, -1,
- op_errno);
- }
+int
+afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ if (op_ret < 0) {
+ afr_sh_entry_impunge_create_file (impunge_frame, this,
+ (long)cookie);
+ } else {
+ afr_sh_entry_impunge_hardlink (impunge_frame, this,
+ (long)cookie);
}
-
return 0;
}
+int
+afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame,
+ xlator_t *this,
+ int child_index, struct iatt *stbuf)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ loc_t *loc = NULL;
+ dict_t *xattr_req = NULL;
+ loc_t oldloc = {0};
+ int ret = -1;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ loc = &impunge_local->loc;
+
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ goto out;
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, stbuf->ia_gfid);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lookup,
+ &oldloc, xattr_req);
+ ret = 0;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ loc_wipe (&oldloc);
+ if (ret)
+ sh->impunge_done (frame, this, -1, ENOMEM);
+ return 0;
+}
int
afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
@@ -1221,11 +1257,41 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
if (!dict)
gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
if (ret)
gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
impunge_local->loc.path);
+ /*
+ * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY :
+ *
+ * Problem:
+ * While a brick is down in a replica pair, lets say the user creates
+ * one file(file-A) and a hard link to that file(h-file-A). After the
+ * brick comes back up, entry self-heal is attempted on parent dir of
+ * these two files. As part of readdir in self-heal it reads both the
+ * entries file-A and h-file-A for both of them it does name less lookup
+ * to check if there are any hardlinks already present in the
+ * destination brick. It finds that there are no hard links already
+ * present for files file-A, h-file-A. Self-heal does mknods for both
+ * file-A and h-file-A. This leads to file-A and h-file-A not being
+ * hardlinks anymore.
+ *
+ * Fix: (More like shrinking of race-window, the race itself is still
+ * present in posix-mknod).
+ * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then
+ * posix_mknod checks if there are already any gfid-links and does
+ * link() instead of mknod. There still can be a race where two
+ * posix_mknods same gfid see that
+ * gfid-link file is not present and proceeds with mknods and result in
+ * two different files with same gfid.
+ */
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: %s set failed",
+ impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY);
+
STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
(void *) (long) child_index,
priv->children[child_index],
@@ -1233,7 +1299,7 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
&impunge_local->loc,
st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
makedev (ia_major (stbuf->ia_rdev),
- ia_minor (stbuf->ia_rdev)), dict);
+ ia_minor (stbuf->ia_rdev)), 0, dict);
if (dict)
dict_unref (dict);
@@ -1263,6 +1329,7 @@ afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
return 0;
}
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
if (ret)
gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
@@ -1279,7 +1346,7 @@ afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
priv->children[child_index]->fops->mkdir,
&impunge_local->loc,
st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- dict);
+ 0, dict);
if (dict)
dict_unref (dict);
@@ -1297,32 +1364,20 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
dict_t *dict = NULL;
struct iatt *buf = NULL;
int ret = 0;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t impunge_ret_child = 0;
priv = this->private;
impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- buf = &impunge_local->cont.symlink.buf;
+ buf = &impunge_local->cont.dir_fop.buf;
dict = dict_new ();
if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (impunge_frame, this, impunge_ret_child, -1,
- ENOMEM);
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, ENOMEM);
goto out;
}
+ GF_ASSERT (!uuid_is_null (buf->ia_gfid));
ret = afr_set_dict_gfid (dict, buf->ia_gfid);
if (ret)
gf_log (this->name, GF_LOG_INFO,
@@ -1338,7 +1393,7 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc, dict);
+ linkname, &impunge_local->loc, 0, dict);
if (dict)
dict_unref (dict);
@@ -1352,26 +1407,17 @@ afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
int child_index = -1;
- call_frame_t *frame = NULL;
int call_count = -1;
- int active_src = -1;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int32_t impunge_ret_child = 0;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = impunge_sh->active_source;
child_index = (long) cookie;
@@ -1395,12 +1441,9 @@ out:
}
UNLOCK (&impunge_frame->lock);
- if (call_count == 0) {
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
- }
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
return 0;
}
@@ -1425,7 +1468,7 @@ afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->unlink,
- &impunge_local->loc);
+ &impunge_local->loc, 0, NULL);
return 0;
}
@@ -1435,25 +1478,18 @@ int
afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
int child_index = -1;
- call_frame_t *frame = NULL;
int call_count = -1;
int active_src = -1;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int32_t impunge_ret_child = 0;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
active_src = impunge_sh->active_source;
child_index = (long) cookie;
@@ -1498,12 +1534,9 @@ out:
}
UNLOCK (&impunge_frame->lock);
- if (call_count == 0) {
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
- }
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
return 0;
}
@@ -1527,7 +1560,7 @@ afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096);
+ &impunge_local->loc, 4096, NULL);
return 0;
}
@@ -1537,25 +1570,18 @@ int
afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
int child_index = -1;
- call_frame_t *frame = NULL;
int call_count = -1;
int active_src = -1;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int32_t impunge_ret_child = 0;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
active_src = impunge_sh->active_source;
child_index = (long) cookie;
@@ -1581,12 +1607,9 @@ out:
}
UNLOCK (&impunge_frame->lock);
- if (call_count == 0) {
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
- }
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
return 0;
}
@@ -1605,36 +1628,38 @@ afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
active_src = impunge_sh->active_source;
- impunge_local->cont.symlink.buf = *stbuf;
+ impunge_local->cont.dir_fop.buf = *stbuf;
STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
(void *) (long) child_index,
priv->children[active_src],
priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
+ &impunge_local->loc, 4096, NULL);
return 0;
}
int
afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent)
+ int child_index)
{
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
ia_type_t type = IA_INVAL;
- int ret = 0;
int active_src = 0;
+ struct iatt *buf = NULL;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->parentbuf = *postparent;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
active_src = impunge_sh->active_source;
- impunge_local->cont.lookup.buf = *buf;
- afr_update_loc_gfids (&impunge_local->loc, buf, postparent);
+ afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf,
+ &impunge_sh->parentbuf);
+ buf = &impunge_sh->entrybuf;
type = buf->ia_type;
switch (type) {
@@ -1643,12 +1668,9 @@ afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
case IA_IFBLK:
case IA_IFCHR:
case IA_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
- break;
case IA_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
+ afr_sh_entry_impunge_check_hardlink (impunge_frame, this,
+ child_index, buf);
break;
case IA_IFDIR:
afr_sh_entry_impunge_mkdir (impunge_frame, this,
@@ -1659,288 +1681,270 @@ afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
"%s has unknown file type on %s: 0%o",
impunge_local->loc.path,
priv->children[active_src]->name, type);
- ret = -1;
+ sh->impunge_done (frame, this, -1, EINVAL);
break;
}
- return ret;
+ return 0;
}
int
-afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr,struct iatt *postparent)
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int call_count = 0;
- int ret = 0;
- int32_t impunge_ret_child = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
-
- child_index = (long) cookie;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
active_src = impunge_sh->active_source;
+ buf = &impunge_sh->entrybuf;
+ type = buf->ia_type;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s on %s (for %s) failed (%s)",
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ afr_sh_entry_impunge_mknod (impunge_frame, this,
+ child_index, buf);
+ break;
+ case IA_IFLNK:
+ afr_sh_entry_impunge_readlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
impunge_local->loc.path,
- priv->children[active_src]->name,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
+ priv->children[active_src]->name, type);
+ sh->impunge_done (frame, this, -1, EINVAL);
+ break;
}
- ret = afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf,
- postparent);
- if (ret)
+ return 0;
+}
+
+gf_boolean_t
+afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child,
+ unsigned int child_count)
+{
+ gf_boolean_t recreate = _gf_false;
+
+ GF_ASSERT (impunge_sh->child_errno);
+
+ if (child == impunge_sh->active_source)
goto out;
- return 0;
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ recreate = _gf_true;
+ goto out;
+ }
+ if (impunge_sh->child_errno[child] == ENOENT)
+ recreate = _gf_true;
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ return recreate;
+}
- if (call_count == 0) {
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
+unsigned int
+afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
+ unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (afr_sh_need_recreate (impunge_sh, i, child_count))
+ count++;
}
- return 0;
+ return count;
}
-
int
-afr_sh_entry_impunge_recreate (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
+ xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ unsigned int recreate_count = 0;
+ int i = 0;
int active_src = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
active_src = impunge_sh->active_source;
-
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_recreate_lookup_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &impunge_local->loc, 0);
-
+ impunge_sh->entrybuf = impunge_sh->buf[active_src];
+ impunge_sh->parentbuf = impunge_sh->parentbufs[active_src];
+ recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
+ priv->child_count);
+ if (!recreate_count) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0);
+ goto out;
+ }
+ impunge_local->call_count = recreate_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count))
+ continue;
+ (void)afr_sh_entry_impunge_create (impunge_frame, this, i);
+ recreate_count--;
+ }
+ GF_ASSERT (!recreate_count);
+out:
return 0;
}
-
-int
-afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
+void
+afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
- int call_count = 0;
- int child_index = 0;
call_frame_t *frame = NULL;
- int active_src = 0;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- int32_t impunge_ret_child = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- child_index = (long) cookie;
- active_src = impunge_sh->active_source;
-
- if ((op_ret == -1 && op_errno == ENOENT)
- || (IA_ISLNK (impunge_sh->impunging_entry_mode))) {
-
- /*
- * A symlink's target might have changed, so
- * always go down the recreate path for them.
- */
-
- /* decrease call_count in recreate-callback */
-
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- afr_sh_entry_impunge_recreate (impunge_frame, this,
- child_index);
- return 0;
- }
+ unsigned int gfid_miss_count = 0;
+ unsigned int children_up_count = 0;
+ uuid_t gfid = {0};
+ int active_src = 0;
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
- impunge_sh->parentbuf = *postparent;
+ if (op_ret < 0)
+ goto done;
+ if (impunge_sh->child_errno[active_src]) {
+ op_ret = -1;
+ op_errno = impunge_sh->child_errno[active_src];
+ goto done;
+ }
+
+ gfid_miss_count = afr_gfid_missing_count (this->name,
+ impunge_sh->success_children,
+ impunge_sh->buf, priv->child_count,
+ impunge_local->loc.path);
+ children_up_count = afr_up_children_count (impunge_local->child_up,
+ priv->child_count);
+ if ((gfid_miss_count == children_up_count) &&
+ (children_up_count < priv->child_count)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
+ "gfid should not be assigned in this state for %s",
+ impunge_local->loc.path);
+ goto done;
+ }
+
+ if (gfid_miss_count) {
+ afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
+ impunge_sh->success_children,
+ priv->child_count);
+ if (uuid_is_null (gfid)) {
+ sh->entries_skipped = _gf_true;
+ gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry "
+ "self-heal because of gfid absence",
+ impunge_local->loc.path);
+ goto done;
+ }
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, gfid,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
} else {
- gf_log (this->name, GF_LOG_WARNING,
- "looking up %s under %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ afr_sh_entry_call_impunge_recreate (impunge_frame, this);
}
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
- }
-
- return 0;
+ return;
+done:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
+ return;
}
-
int
afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
gf_dirent_t *entry)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
int ret = -1;
call_frame_t *impunge_frame = NULL;
afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
int active_src = 0;
- int i = 0;
- int call_count = 0;
int op_errno = 0;
int op_ret = -1;
- priv = this->private;
local = frame->local;
sh = &local->self_heal;
active_src = sh->active_source;
sh->impunge_done = afr_sh_entry_impunge_entry_done;
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
+ if (can_skip_entry_self_heal (entry->d_name, &local->loc)) {
op_ret = 0;
goto out;
}
gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
+ "inspecting existence of %s under %s",
entry->d_name, local->loc.path);
- impunge_frame = copy_frame (frame);
- if (!impunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
+ ret = afr_impunge_frame_create (frame, this, active_src,
+ &impunge_frame);
+ if (ret) {
+ op_errno = -ret;
goto out;
}
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- impunge_frame->local = impunge_local;
+ impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_src;
- impunge_sh->impunge_ret_child = active_src;
-
- impunge_sh->impunging_entry_mode =
- st_mode_from_ia (entry->d_stat.ia_prot, entry->d_stat.ia_type);
-
- ret = build_child_loc (this, &impunge_local->loc, &local->loc, entry->d_name);
+ ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
+ entry->d_name);
+ loc_copy (&impunge_sh->parent_loc, &local->loc);
if (ret != 0) {
op_errno = ENOMEM;
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
- call_count++;
- }
-
- impunge_local->call_count = call_count;
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS, NULL);
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", impunge_local->loc.path,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_entry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &impunge_local->loc, 0);
-
- if (!--call_count)
- break;
- }
-
- ret = 0;
+ op_ret = 0;
out:
- if (ret == -1)
- sh->impunge_done (frame, this, active_src, op_ret, op_errno);
+ if (ret) {
+ if (impunge_frame)
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
+ }
return 0;
}
@@ -1950,7 +1954,7 @@ int
afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -1973,6 +1977,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
local->loc.path,
priv->children[active_src]->name,
strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"readdir of %s on subvolume %s complete",
@@ -1989,7 +1994,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
entry_count++;
}
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"readdir'ed %d entries from %s",
entry_count, priv->children[active_src]->name);
@@ -2005,21 +2010,24 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ int32_t active_src = 0;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ active_src = sh->active_source;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd",
+ local->loc.path, sh->offset);
STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
priv->children[active_src],
priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
return 0;
}
@@ -2042,7 +2050,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_source (frame, this, sh->active_source);
sh->active_source = active_src;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -2057,7 +2065,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
"impunging entries of %s on %s to other sinks",
local->loc.path, priv->children[active_src]->name);
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ afr_sh_entry_impunge_subvol (frame, this);
return 0;
}
@@ -2065,7 +2073,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
int
afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -2091,7 +2099,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
}
UNLOCK (&frame->lock);
@@ -2099,7 +2107,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -2137,7 +2145,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
source = local->self_heal.source;
sources = local->self_heal.sources;
- sh->block_size = 65536; //131072
+ sh->block_size = priv->sh_readdir_size;
sh->offset = 0;
call_count = sh->active_sinks;
@@ -2159,7 +2167,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
(void *) (long) source,
priv->children[source],
priv->children[source]->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
call_count--;
}
@@ -2176,7 +2184,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
if (!--call_count)
break;
@@ -2192,9 +2200,7 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
int source = 0;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -2202,63 +2208,67 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
source = sh->source;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
+ afr_sh_mark_source_sinks (frame, this);
if (source != -1)
sh->success[source] = 1;
- if (active_sinks == 0) {
+ if (sh->active_sinks == 0) {
gf_log (this->name, GF_LOG_TRACE,
"no active sinks for self-heal on dir %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return 0;
}
- if (source == -1 && active_sinks < 2) {
+ if (source == -1 && sh->active_sinks < 2) {
gf_log (this->name, GF_LOG_TRACE,
"cannot sync with 0 sources and 1 sink on dir %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return 0;
}
- sh->active_sinks = active_sinks;
if (source != -1)
gf_log (this->name, GF_LOG_DEBUG,
"self-healing directory %s from subvolume %s to "
"%d other",
local->loc.path, priv->children[source]->name,
- active_sinks);
+ sh->active_sinks);
else
gf_log (this->name, GF_LOG_DEBUG,
"no active sources for %s found. "
"merging all entries as a conservative decision",
local->loc.path);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
afr_sh_entry_open (frame, this);
return 0;
}
-int
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int source = 0;
-
- int nsources = 0;
+ int nsources = 0;
+ int32_t subvol_status = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_entry_finish (frame, this);
+ goto out;
+ }
+
if (sh->forced_merge) {
sh->source = -1;
goto heal;
@@ -2267,68 +2277,39 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources == 0) {
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", local->loc.path);
+ source = -1;
+ memset (sh->sources, 0,
+ sizeof (*sh->sources) * priv->child_count);
+ } else if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
- return 0;
+ return;
+ } else {
+ source = afr_sh_select_source (sh->sources, priv->child_count);
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
sh->source = source;
afr_reset_children (sh->fresh_children, priv->child_count);
afr_get_fresh_children (sh->success_children, sh->sources,
sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-
+ if (sh->source >= 0)
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
heal:
afr_sh_entry_sync_prepare (frame, this);
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_entry_fix (frame, this);
- }
-
- return 0;
+out:
+ return;
}
int
@@ -2345,14 +2326,17 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
"failed for %s.", local->loc.path);
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_entry_done (frame, this);
} else {
gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
"for %s. Proceeding to FOP", local->loc.path);
afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_entry_lookup_cbk, _gf_false);
+ afr_sh_entry_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
}
return 0;
@@ -2361,14 +2345,18 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
+ afr_self_heal_t *sh = NULL;
priv = this->private;
local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
- if (local->self_heal.need_entry_self_heal && priv->entry_self_heal) {
+ if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_entrylk (frame, this, &local->loc, NULL,
afr_sh_post_nonblocking_entry_cbk);
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index bacf27588..fd5da6cfd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -54,70 +45,26 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
- memset (sh->success, 0, sizeof (int) * priv->child_count);
-/* for (i = 0; i < priv->child_count; i++) { */
-/* sh->locked_nodes[i] = 1; */
-/* } */
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_INFO,
- "split-brain detected, aborting selfheal of %s",
+ afr_sh_reset (frame, this);
+ if (IA_ISDIR (sh->type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to entry check on %s",
local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
+ afr_self_heal_entry (frame, this);
} else {
- if (IA_ISREG (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- return 0;
- }
-
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- sh->completion_cbk (frame, this);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to data check on %s",
+ local->loc.path);
+ afr_self_heal_data (frame, this);
}
return 0;
}
-
-int
-afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_done (frame, this);
-
- return 0;
-}
-
int
afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
{
@@ -141,11 +88,24 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
return 0;
}
+int
+afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_metadata_finish (frame, this);
+ return 0;
+}
int
afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = 0;
@@ -160,8 +120,8 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
(!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_fresh_children_add_child (sh->fresh_children, i,
- priv->child_count);
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
}
call_count = afr_frame_return (frame);
@@ -177,85 +137,19 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
return 0;
}
-
int
afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
- if (!erase_xattr)
- return -ENOMEM;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
+ afr_sh_erase_pending (frame, this, AFR_METADATA_TRANSACTION,
+ afr_sh_metadata_erase_pending_cbk,
+ afr_sh_metadata_finish);
+ return 0;
}
int
afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -286,8 +180,13 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
- if (call_count == 0)
+ if (call_count == 0) {
+ if (local->xattr_req) {
+ dict_unref (local->xattr_req);
+ local->xattr_req = NULL;
+ }
afr_sh_metadata_erase_pending (frame, this);
+ }
return 0;
}
@@ -296,9 +195,9 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -306,13 +205,93 @@ afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
+int
+afr_sh_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
+
+ i = (long) cookie;
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc, local->xattr_req, 0, NULL);
+
+ out:
+ return 0;
+}
+
+inline void
+afr_prune_special_keys (dict_t *xattr_dict)
+{
+ dict_del (xattr_dict, GF_SELINUX_XATTR_KEY);
+}
+
+inline void
+afr_prune_pending_keys (dict_t *xattr_dict, afr_private_t *priv)
+{
+ int i = 0;
+
+ for (; i < priv->child_count; i++) {
+ dict_del (xattr_dict, priv->pending_key[i]);
+ }
+}
+
+int
+afr_sh_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
+
+ afr_prune_pending_keys (xattr, priv);
+
+ afr_prune_special_keys (xattr);
+
+ i = (long) cookie;
+
+ /* send removexattr in bulk via xdata */
+ STACK_WIND_COOKIE (frame, afr_sh_removexattr_cbk,
+ cookie,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc, "", xattr);
+
+ out:
+ return 0;
+}
int
afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
@@ -338,9 +317,10 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
/*
* 2 calls per sink - setattr, setxattr
*/
- if (xattr)
+ if (xattr) {
call_count = active_sinks * 2;
- else
+ local->xattr_req = dict_ref (xattr);
+ } else
call_count = active_sinks;
local->call_count = call_count;
@@ -376,18 +356,18 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ &local->loc, &stbuf, valid, NULL);
call_count--;
if (!xattr)
continue;
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ STACK_WIND_COOKIE (frame, afr_sh_getxattr_cbk,
(void *) (long) i,
priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
+ priv->children[i]->fops->getxattr,
+ &local->loc, NULL, NULL);
call_count--;
}
@@ -396,17 +376,15 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int source = 0;
- int i;
-
local = frame->local;
sh = &local->self_heal;
priv = this->private;
@@ -421,16 +399,147 @@ afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
afr_sh_metadata_sync (frame, this, NULL);
} else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
+ afr_prune_pending_keys (xattr, priv);
afr_sh_metadata_sync (frame, this, xattr);
}
return 0;
}
+static void
+afr_set_metadata_sh_info_str (afr_local_t *local, afr_self_heal_t *sh,
+ xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *string = NULL;
+ size_t off = 0;
+ char *source_child = " from source %s to";
+ char *format = " %s, ";
+ char *string_msg = " metadata self heal";
+ char *pending_matrix_str = NULL;
+ int down_child_present = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
+ int down_count = 0;
+ int unknown_count = 0;
+
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ len += snprintf (num, sizeof (num), "%s", string_msg);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), source_child,
+ priv->children[i]->name);
+ } else if ((local->child_up[i] == 1) && (sh->sources[i] == 0)) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ len ++;
+
+ string = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+ if (!string)
+ return;
+
+ off += snprintf (string + off, len - off, "%s", string_msg);
+ for (i=0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1))
+ off += snprintf (string + off, len - off, source_child,
+ priv->children[i]->name);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((local->child_up[i] == 1)&& (sh->sources[i] == 0))
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ gf_asprintf (&sh->metadata_sh_info, "%s metadata %s,", string,
+ pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (string && strcmp (string, ""))
+ GF_FREE (string);
+}
int
afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
@@ -438,9 +547,7 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
int source = 0;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -448,38 +555,35 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
source = sh->source;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
gf_log (this->name, GF_LOG_DEBUG,
"no active sinks for performing self-heal on file %s",
local->loc.path);
afr_sh_metadata_finish (frame, this);
return 0;
}
- sh->active_sinks = active_sinks;
gf_log (this->name, GF_LOG_TRACE,
"syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name, active_sinks);
+ local->loc.path, priv->children[source]->name,
+ sh->active_sinks);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_set_metadata_sh_info_str (local, sh, this);
STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
priv->children[source],
priv->children[source]->fops->getxattr,
- &local->loc, NULL);
+ &local->loc, NULL, NULL);
return 0;
}
-int
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -492,19 +596,16 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_metadata_finish (frame, this);
+ goto out;
+ }
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->success_children,
- AFR_METADATA_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
-
+ AFR_METADATA_TRANSACTION, NULL, _gf_false);
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -521,15 +622,21 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
}
if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, SPB, DONT_KNOW);
+ afr_sh_metadata_fail (frame, this);
+ goto out;
+ }
- local->govinda_gOvinda = 1;
+ afr_set_split_brain (this, sh->inode, NO_SPB, DONT_KNOW);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
afr_sh_metadata_finish (frame, this);
- return 0;
+ goto out;
}
source = afr_sh_select_source (sh->sources, priv->child_count);
@@ -539,7 +646,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
"No active sources found.");
afr_sh_metadata_finish (frame, this);
- return 0;
+ goto out;
}
sh->source = source;
@@ -558,71 +665,19 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
if ((!IA_ISREG (sh->buf[source].ia_type)) &&
(!IA_ISDIR (sh->buf[source].ia_type))) {
- afr_reset_children (sh->fresh_children,
- priv->child_count);
+ afr_reset_children (sh->fresh_children, priv->child_count);
afr_get_fresh_children (sh->success_children, sh->sources,
sh->fresh_children, priv->child_count);
afr_inode_set_read_ctx (this, sh->inode, sh->source,
sh->fresh_children);
}
- afr_sh_metadata_sync_prepare (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->ia_type);
-
- sh->buf[child_index] = *buf;
- if (xattr)
- sh->xattr[child_index] = dict_ref (xattr);
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_fix (frame, this);
-
- return 0;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ afr_sh_metadata_sync_prepare (frame, this);
+ else
+ afr_sh_metadata_finish (frame, this);
+out:
+ return;
}
int
@@ -636,9 +691,9 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
int_lock = &local->internal_lock;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking metadata "
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
"inodelks failed for %s.", local->loc.path);
- gf_log (this->name, GF_LOG_ERROR, "Metadata self-heal "
+ gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal "
"failed for %s.", local->loc.path);
afr_sh_metadata_done (frame, this);
} else {
@@ -647,7 +702,10 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
"inodelks done for %s. Proceeding to FOP",
local->loc.path);
afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_metadata_lookup_cbk, _gf_false);
+ afr_sh_metadata_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
}
return 0;
@@ -657,19 +715,22 @@ int
afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ int_lock->domain = this->name;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = 0;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_start = LLONG_MAX - 1;
+ inodelk->flock.l_len = 0;
+ inodelk->flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
afr_nonblocking_inodelk (frame, this);
@@ -677,17 +738,29 @@ afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
return 0;
}
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = this->private;
-
+ afr_self_heal_t *sh = &local->self_heal;
local = frame->local;
+ sh = &local->self_heal;
+ sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
- if (local->self_heal.need_metadata_self_heal && priv->metadata_self_heal) {
+ if (afr_can_start_metadata_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_metadata_lock (frame, this);
} else {
afr_sh_metadata_done (frame, this);
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index f40c06faa..7c9bc8111 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_H__
@@ -30,13 +21,6 @@
#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this);
-int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this);
-int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this);
-
-int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
int
@@ -54,5 +38,6 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
int
afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
dict_t **xattr,
- afr_transaction_type txn_type);
+ afr_transaction_type txn_type,
+ uuid_t gfid);
#endif /* __AFR_SELF_HEAL_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
new file mode 100644
index 000000000..1b48a1bca
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -0,0 +1,1787 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "afr.h"
+#include "syncop.h"
+#include "afr-self-heald.h"
+#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+#include "event-history.h"
+
+typedef enum {
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+} afr_crawl_flags_t;
+
+typedef enum {
+ HEAL = 1,
+ INFO,
+ STATISTICS_TO_BE_HEALED,
+} shd_crawl_op;
+
+typedef struct shd_dump {
+ dict_t *dict;
+ xlator_t *this;
+ int child;
+} shd_dump_t;
+
+typedef struct shd_event_ {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct shd_pos_ {
+ int child;
+ xlator_t *this;
+ afr_child_pos_t pos;
+} shd_pos_t;
+
+typedef int
+(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done);
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+
+/* For calling straight through (e.g. already in a synctask). */
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos);
+
+/* For deferring through a new synctask. */
+int
+afr_syncop_find_child_position (void *data);
+
+static int
+_loc_assign_gfid_path (loc_t *loc)
+{
+ int ret = -1;
+ char gfid_path[64] = {0};
+
+ if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
+ ret = inode_path (loc->inode, NULL, (char**)&loc->path);
+ } else if (!uuid_is_null (loc->gfid)) {
+ snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>",
+ uuid_utoa (loc->gfid));
+ loc->path = gf_strdup (gfid_path);
+ if (loc->path)
+ ret = 0;
+ }
+ return ret;
+}
+
+void
+_destroy_crawl_event_data (void *data)
+{
+ shd_crawl_event_t *crawl_event = NULL;
+
+ if (!data)
+ goto out;
+
+ crawl_event = (shd_crawl_event_t *)data;
+ GF_FREE (crawl_event->start_time_str);
+ GF_FREE (crawl_event->end_time_str);
+
+out:
+ return;
+}
+
+void
+_destroy_shd_event_data (void *data)
+{
+ shd_event_t *event = NULL;
+ if (!data)
+ goto out;
+ event = (shd_event_t*)data;
+ GF_FREE (event->path);
+out:
+ return;
+}
+void
+shd_cleanup_event (void *event)
+{
+ shd_event_t *shd_event = event;
+
+ if (!shd_event)
+ goto out;
+ GF_FREE (shd_event->path);
+ GF_FREE (shd_event);
+out:
+ return;
+}
+
+int
+afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+{
+ int i = 0;
+ int ret = -1;
+ for (i = 0; i < child_count; i++) {
+ if (shd->pos[i] == AFR_POS_LOCAL) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child,
+ shd_crawl_event_t *shd_event, struct timeval *tv)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ healed_count = shd_event->healed_count;
+ split_brain_count = shd_event->split_brain_count;
+ heal_failed_count = shd_event->heal_failed_count;
+ start_time_str = shd_event->start_time_str;
+ end_time_str = shd_event->end_time_str;
+ crawl_type = shd_event->crawl_type;
+
+ if (!start_time_str) {
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "split_brain_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup (crawl_type));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_type to output");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_failed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup(start_time_str));
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_start_time to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (!end_time_str)
+ end_time_str = "Could not determine the end time";
+ ret = dict_set_dynstr (output, key, gf_strdup(end_time_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_end_time to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (shd_event->crawl_inprogress == _gf_true)
+ progress = 1;
+ else
+ progress = 0;
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "inprogress to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment the "
+ "counter.");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv, gf_boolean_t dyn)
+{
+ //subkey not used for now
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ if (dyn)
+ ret = dict_set_dynstr (output, key, path);
+ else
+ ret = dict_set_str (output, key, path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ path);
+ goto out;
+ }
+
+ if (!tv)
+ goto inc_count;
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
+ path);
+ goto out;
+ }
+
+inc_count:
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
+ char **fpath, gf_boolean_t *missing)
+{
+ dict_t *xattr = NULL;
+ char *path = NULL;
+ int ret = -1;
+
+ ret = syncop_getxattr (readdir_xl, child, &xattr, GFID_TO_PATH_KEY);
+ if (ret < 0) {
+ if ((errno == ENOENT) && missing)
+ *missing = _gf_true;
+ goto out;
+ }
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get path for "
+ "gfid %s", uuid_utoa (child->gfid));
+ goto out;
+ }
+ path = gf_strdup (path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *fpath = path;
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+_add_event_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ if (shd_event->child != dump_data->child)
+ goto out;
+ ret = _add_path_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event->path, &cb->tv,
+ _gf_false);
+out:
+ return ret;
+}
+
+int
+_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_crawl_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event, &cb->tv);
+ return ret;
+}
+
+int
+_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (eh, &dump_data, _add_event_to_dict);
+ return 0;
+}
+
+
+int
+_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (shd->statistics[child], &dump_data,
+ _add_crawl_event_statistics_to_dict);
+ return 0;
+
+}
+
+void
+_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
+ loc_t *parent, char *fname)
+{
+ int ret = 0;
+ loc_t index_loc = {0};
+
+ ret = _build_index_loc (this, &index_loc, fname, parent);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if(ret && (errno != ENOENT)) {
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to remove index "
+ "on %s - %s",index_loc.name, readdir_xl->name,
+ strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+out:
+ return;
+}
+
+int
+_count_hard_links_under_base_indices_dir (xlator_t *this,
+ afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *childloc,
+ loc_t *parentloc, struct iatt *iattr)
+{
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+ struct iatt parent = {0};
+ int ret = 0;
+ dict_t *output = NULL;
+ int xl_id = 0;
+ char key[256] = {0};
+ int child = -1;
+ uint64_t hardlinks = 0;
+
+ output = crawl_data->op_data;
+ child = crawl_data->child;
+
+ ret = syncop_lookup (readdir_xl, childloc, NULL, iattr, NULL, &parent);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%d-%d-hardlinks", xl_id, child);
+ ret = dict_get_uint64 (output, key, &hardlinks);
+
+ /*Removing the count of base_entry under indices/base_indicies and
+ * entry under indices/xattrop */
+ hardlinks = hardlinks + iattr->ia_nlink - 2;
+ ret = dict_set_uint64 (output, key, hardlinks);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+int
+_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry,
+ loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+{
+ dict_t *output = NULL;
+ xlator_t *readdir_xl = NULL;
+ int ret = -1;
+ char *path = NULL;
+ gf_boolean_t missing = _gf_false;
+ char gfid_str[64] = {0};
+
+ if (uuid_is_null (childloc->gfid))
+ goto out;
+
+ output = crawl_data->op_data;
+ readdir_xl = crawl_data->readdir_xl;
+
+ ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path,
+ &missing);
+ if (ret == 0) {
+ ret = _add_path_to_dict (this, output, crawl_data->child, path,
+ NULL, _gf_true);
+ } else if (missing) {
+ _remove_stale_index (this, readdir_xl, parentloc,
+ uuid_utoa_r (childloc->gfid, gfid_str));
+ }
+
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return ret;
+}
+
+void
+_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
+ afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ eh_t *eh = NULL;
+ char *path = NULL;
+ char gfid_str[64] = {0};
+ shd_event_t *event = NULL;
+ int32_t sh_failed = 0;
+ gf_boolean_t split_brain = 0;
+ int32_t actual_sh_done = 0;
+ shd_crawl_event_t **shd_crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (crawl_data->crawl == INDEX) {
+ if ((op_ret < 0) && (op_errno == ENOENT)) {
+ _remove_stale_index (this, crawl_data->readdir_xl,
+ parent, uuid_utoa_r (child->gfid,
+ gfid_str));
+ goto out;
+ }
+ ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
+ child, &path, NULL);
+ if (ret)
+ goto out;
+ } else {
+ path = gf_strdup (child->path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (xattr_rsp) {
+ ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
+ ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done);
+ }
+
+ shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events);
+
+ split_brain = afr_is_split_brain (this, child->inode);
+ if ((op_ret < 0 && op_errno == EIO) || split_brain) {
+ eh = shd->split_brain;
+ shd_crawl_event[crawl_data->child]->split_brain_count += 1;
+ } else if ((op_ret < 0) || sh_failed) {
+ eh = shd->heal_failed;
+ shd_crawl_event[crawl_data->child]->heal_failed_count += 1;
+ } else if (actual_sh_done == 1) {
+ eh = shd->healed;
+ shd_crawl_event[crawl_data->child]->healed_count += 1;
+ }
+ ret = -1;
+
+ if (eh != NULL) {
+ event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
+ if (!event)
+ goto out;
+ event->child = crawl_data->child;
+ event->path = path;
+
+ ret = eh_save_history (eh, event);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save "
+ "to event history, (%d, %s)", path, op_ret,
+ strerror (op_errno));
+
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ",
+ path);
+
+ }
+ ret = 0;
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return;
+}
+
+int
+_link_inode_update_loc (xlator_t *this, loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link (loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "inode link failed "
+ "on the inode (%s)", uuid_utoa (iattr->ia_gfid));
+ goto out;
+ }
+ inode_unref (loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
+ loc_t *child, loc_t *parent, struct iatt *iattr)
+{
+ struct iatt parentbuf = {0};
+ int ret = 0;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1);
+
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
+
+ ret = syncop_lookup (this, child, xattr_req,
+ iattr, &xattr_rsp, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
+ crawl_data);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ if (ret == 0)
+ ret = _link_inode_update_loc (this, child, iattr);
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
+void
+_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_start_crawl (this, child, crawl, _self_heal_entry,
+ NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ afr_crawl_done);
+}
+
+gf_boolean_t
+_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t proceed = _gf_false;
+ char *msg = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (!shd->enabled) {
+ msg = "Self-heal daemon is not enabled";
+ gf_log (this->name, GF_LOG_DEBUG, "%s", msg);
+ goto out;
+ }
+ if (!priv->child_up[child]) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , "
+ "subvol went down", priv->children[child]->name);
+ msg = "Brick is Not connected";
+ goto out;
+ }
+
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl as "
+ "< 2 children are up");
+ msg = "< 2 bricks in replica are running";
+ goto out;
+ }
+ }
+ proceed = _gf_true;
+out:
+ if (reason)
+ *reason = msg;
+ return proceed;
+}
+
+int
+_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ shd_crawl_op op, dict_t *output)
+{
+ afr_private_t *priv = NULL;
+ char *status = NULL;
+ char *subkey = NULL;
+ char key[256] = {0};
+ shd_pos_t pos_data = {0};
+ int op_ret = -1;
+ int xl_id = -1;
+ int i = 0;
+ int ret = 0;
+ int crawl_flags = 0;
+
+ priv = this->private;
+ if (op == HEAL)
+ crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+
+ if (output) {
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
+ "translator-id is not available");
+ goto out;
+ }
+ }
+ pos_data.this = this;
+ subkey = "status";
+ for (i = 0; i < priv->child_count; i++) {
+ if (_crawl_proceed (this, i, crawl_flags, &status)) {
+ pos_data.child = i;
+ /*
+ * We're already in a synctask in this case, so we
+ * don't need to defer through a second (and in fact
+ * that can cause deadlock). Just call straight
+ * through instead.
+ */
+ ret = afr_find_child_position(pos_data.this,
+ pos_data.child,
+ &pos_data.pos);
+ if (ret) {
+ status = "Not able to find brick location";
+ } else if (pos_data.pos == AFR_POS_REMOTE) {
+ status = "brick is remote";
+ } else {
+ op_ret = 0;
+ if (op == HEAL) {
+ status = "Started self-heal";
+ _do_self_heal_on_subvol (this, i,
+ crawl);
+ } else if (output && (op == INFO)) {
+ status = "";
+ afr_start_crawl (this, i, INDEX,
+ _add_summary_to_dict,
+ output, _gf_false, 0,
+ NULL);
+ } else if (output &&
+ (op == STATISTICS_TO_BE_HEALED)) {
+ status = "";
+ afr_start_crawl (this, i,
+ INDEX_TO_BE_HEALED,
+ _count_hard_links_under_base_indices_dir,
+ output, _gf_false,
+ 0, NULL);
+ }
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
+ i, subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ if (!op_ret && (crawl == FULL))
+ break;
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i,
+ subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ }
+out:
+ return op_ret;
+}
+
+int
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+}
+
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
+}
+
+void
+afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd= &priv->shd;
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_statistics_to_dict (this, dict, i);
+ }
+
+ return ;
+}
+
+static void
+reset_crawl_event (shd_crawl_event_t *crawl_event)
+{
+ crawl_event->healed_count = 0;
+ crawl_event->split_brain_count = 0;
+ crawl_event->heal_failed_count = 0;
+ GF_FREE (crawl_event->start_time_str);
+ crawl_event->start_time_str = NULL;
+ crawl_event->end_time_str = NULL;
+ crawl_event->crawl_type = NULL;
+ crawl_event->crawl_inprogress = _gf_false;
+ return;
+}
+
+static void
+afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst)
+{
+ dst->healed_count = src->healed_count;
+ dst->split_brain_count = src->split_brain_count;
+ dst->heal_failed_count = src->heal_failed_count;
+ dst->start_time_str = gf_strdup (src->start_time_str);
+ dst->end_time_str = "Crawl is already in progress";
+ dst->crawl_type = src->crawl_type;
+ dst->crawl_inprogress = _gf_true;
+ return;
+}
+
+static int
+afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict)
+{
+ shd_crawl_event_t *evnt = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd = &priv->shd;
+
+ evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t),
+ gf_afr_mt_shd_crawl_event_t);
+ if (!evnt) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+
+ reset_crawl_event (evnt);
+
+ if (!shd->crawl_events[i]) {
+ continue;
+ }
+
+ afr_copy_crawl_event_struct (shd->crawl_events[i],
+ evnt);
+ _add_crawl_stats_to_dict (this, dict, i, evnt, NULL);
+
+ }
+ }
+ UNLOCK (&priv->lock);
+ reset_crawl_event (evnt);
+ GF_FREE (evnt);
+
+out:
+ return ret;
+}
+
+static int
+_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_fill_completed_crawl_statistics_to_dict (this, dict);
+ ret = afr_fill_crawl_statistics_of_running_crawl (this, dict);
+ return ret;
+}
+int
+_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_eh_to_dict (this, eh, dict, i);
+ }
+ return 0;
+}
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int xl_id = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ ret = _do_self_heal_on_local_subvols (this, INDEX, output);
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ ret = _do_self_heal_on_local_subvols (this, FULL, output);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ (void)_get_index_summary_on_local_subvols (this, output);
+ ret = 0;
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->healed, output);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed,
+ output);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->split_brain,
+ output);
+ break;
+ case GF_AFR_OP_STATISTICS:
+ ret = _add_local_subvols_crawl_statistics_to_dict (this, output);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+ STATISTICS_TO_BE_HEALED,
+ output);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
+
+void
+afr_poll_self_heal (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct timespec timeout = {0};
+ xlator_t *this = NULL;
+ long child = (long)data;
+ gf_timer_t *old_timer = NULL;
+ gf_timer_t *new_timer = NULL;
+ shd_pos_t pos_data = {0};
+ int ret = 0;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (shd->pos[child] == AFR_POS_UNKNOWN) {
+ pos_data.this = this;
+ pos_data.child = child;
+ ret = synctask_new (this->ctx->env,
+ afr_syncop_find_child_position,
+ NULL, NULL, &pos_data);
+ if (!ret)
+ shd->pos[child] = pos_data.pos;
+ }
+ if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL))
+ _do_self_heal_on_subvol (this, child, INDEX);
+ timeout.tv_sec = shd->timeout;
+ timeout.tv_nsec = 0;
+ //notify and previous timer should be synchronized.
+ LOCK (&priv->lock);
+ {
+ old_timer = shd->timer[child];
+ if (shd->pos[child] == AFR_POS_REMOTE)
+ goto unlock;
+ shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
+ afr_poll_self_heal,
+ data);
+ new_timer = shd->timer[child];
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (old_timer)
+ gf_timer_call_cancel (this->ctx, old_timer);
+ if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not create self-heal polling timer for %s",
+ priv->children[child]->name);
+ }
+ return;
+}
+
+static int
+afr_handle_child_up (int ret, call_frame_t *sync_frame, void *data)
+{
+ afr_self_heald_t *shd = NULL;
+ shd_pos_t *pos_data = data;
+ afr_private_t *priv = NULL;
+
+ if (ret)
+ goto out;
+
+ priv = pos_data->this->private;
+ shd = &priv->shd;
+ shd->pos[pos_data->child] = pos_data->pos;
+ if (pos_data->pos != AFR_POS_REMOTE)
+ afr_poll_self_heal ((void*)(long)pos_data->child);
+ _do_self_heal_on_local_subvols (THIS, INDEX, NULL);
+out:
+ GF_FREE (data);
+ return 0;
+}
+
+void
+afr_proactive_self_heal (void *data)
+{
+ xlator_t *this = NULL;
+ long child = (long)data;
+ shd_pos_t *pos_data = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ //Position of brick could have changed and it could be local now.
+ //Compute the position again
+ pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
+ if (!pos_data)
+ goto out;
+ pos_data->this = this;
+ pos_data->child = child;
+ ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
+ afr_handle_child_up, NULL, pos_data);
+ if (ret)
+ goto out;
+out:
+ return;
+}
+
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
+{
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
+
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
+
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
+ }
+
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
+out:
+ return ret;
+}
+
+int
+afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
+ loc_t *dirloc)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+ void *base_indices_holder_vgfid = NULL;
+ loc_t rootloc = {0};
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+ int ret = 0;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ afr_build_root_loc (this, dirloc);
+ } else if (crawl_data->crawl == INDEX) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get index "
+ "dir gfid on %s", readdir_xl->name);
+ goto out;
+ }
+ if (!index_gfid) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL,
+ &iattr, NULL, &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed on index dir on %s - (%s)",
+ readdir_xl->name, strerror (errno));
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_BASE_INDICES_HOLDER_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_BASE_INDICES_HOLDER_GFID,
+ &base_indices_holder_vgfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ if (!base_indices_holder_vgfid) {
+ gf_log (this->name, GF_LOG_ERROR, "Base indices holder"
+ "virtual gfid is null on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, base_indices_holder_vgfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL, &iattr, NULL,
+ &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed for base_indices_holder dir"
+ " on %s - (%s)", readdir_xl->name,
+ strerror (errno));
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "base_indices"
+ "_holder is not yet created.");
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+ return ret;
+}
+
+int
+afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
+ loc_t *dirloc)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ if (crawl_data->crawl == FULL) {
+ fd = fd_create (dirloc->inode, crawl_data->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", dirloc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s", dirloc->path);
+ goto out;
+ }
+ } else {
+ fd = fd_anonymous (dirloc->inode);
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *dirfd = fd;
+ return ret;
+}
+
+xlator_t*
+afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
+{
+ afr_private_t *priv = this->private;
+
+ if (crawl_data->crawl == FULL) {
+ return this;
+ } else {
+ return priv->children[crawl_data->child];
+ }
+ return NULL;
+}
+
+int
+afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
+ gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ ret = afr_build_child_loc (this, child, parent, entry->d_name);
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ ret = _build_index_loc (this, child, entry->d_name, parent);
+ if (ret)
+ goto out;
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+ child->path = NULL;
+ } else {
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode)
+ goto out;
+ uuid_parse (entry->d_name, child->gfid);
+ ret = _loc_assign_gfid_path (child);
+ }
+out:
+ return ret;
+}
+
+static int
+_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
+ off_t *offset, afr_crawl_data_t *crawl_data)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ loc_t entry_loc = {0};
+ fd_t *fd = NULL;
+ struct iatt iattr = {0};
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
+ *offset = entry->d_off;
+ if (IS_ENTRY_CWD (entry->d_name) ||
+ IS_ENTRY_PARENT (entry->d_name))
+ continue;
+ if ((crawl_data->crawl == FULL) &&
+ uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ parentloc->path, entry->d_name);
+ continue;
+ }
+
+ loc_wipe (&entry_loc);
+ ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
+ entry, crawl_data);
+ if (ret)
+ goto out;
+
+ ret = crawl_data->process_entry (this, crawl_data, entry,
+ &entry_loc, parentloc, &iattr);
+
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED && ret) {
+ goto out;
+ } else if (ret) {
+ continue;
+ }
+
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED))
+ continue;
+
+ if (!IA_ISDIR (iattr.ia_type))
+ continue;
+ fd = NULL;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
+ if (ret)
+ continue;
+ ret = _crawl_directory (fd, &entry_loc, crawl_data);
+ if (fd)
+ fd_unref (fd);
+ }
+ ret = 0;
+out:
+ if ((crawl_data->crawl == INDEX_TO_BE_HEALED) && ret) {
+ gf_log (this->name, GF_LOG_ERROR,"Failed to get the hardlink "
+ "count");
+ }
+ loc_wipe (&entry_loc);
+ return ret;
+}
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
+{
+ xlator_t *this = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ int ret = 0;
+ gf_boolean_t free_entries = _gf_false;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+
+ GF_ASSERT (loc->inode);
+
+ if (crawl_data->crawl == FULL)
+ gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "crawling INDEX %s",
+ uuid_utoa (loc->gfid));
+
+ while (1) {
+ if (crawl_data->crawl == FULL)
+ ret = syncop_readdirp (readdir_xl, fd, 131072, offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (readdir_xl, fd, 131072, offset,
+ &entries);
+ if (ret <= 0)
+ break;
+ ret = 0;
+ free_entries = _gf_true;
+
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
+ if (list_empty (&entries.list))
+ goto out;
+
+ ret = _process_entries (this, loc, &entries, &offset,
+ crawl_data);
+ if ((ret < 0) && (crawl_data->crawl == INDEX_TO_BE_HEALED)) {
+ goto out;
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+ return ret;
+}
+
+static char*
+position_str_get (afr_child_pos_t pos)
+{
+ switch (pos) {
+ case AFR_POS_UNKNOWN:
+ return "unknown";
+ case AFR_POS_LOCAL:
+ return "local";
+ case AFR_POS_REMOTE:
+ return "remote";
+ }
+ return NULL;
+}
+
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ dict_t *xattr_rsp = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+ char *node_uuid = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ afr_build_root_loc (this, &loc);
+
+ ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
+ GF_XATTR_NODE_UUID_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s - "
+ "(%s)", priv->children[child]->name, strerror (errno));
+ goto out;
+ }
+
+ ret = dict_get_str (xattr_rsp, GF_XATTR_NODE_UUID_KEY, &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid key not found on "
+ "child %s", priv->children[child]->name);
+ goto out;
+ }
+
+ if (!strcmp (node_uuid, shd->node_uuid))
+ *pos = AFR_POS_LOCAL;
+ else
+ *pos = AFR_POS_REMOTE;
+
+ gf_log (this->name, GF_LOG_DEBUG, "child %s is %s",
+ priv->children[child]->name, position_str_get (*pos));
+out:
+ if (ret)
+ *pos = AFR_POS_UNKNOWN;
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+afr_syncop_find_child_position (void *data)
+{
+ shd_pos_t *pos_data = data;
+ int ret = 0;
+
+ ret = afr_find_child_position (pos_data->this, pos_data->child,
+ &pos_data->pos);
+ return ret;
+}
+
+static int
+afr_dir_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ xlator_t *readdir_xl = NULL;
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+
+ if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags,
+ NULL))
+ goto out;
+
+ readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
+ if (!readdir_xl)
+ goto out;
+ crawl_data->readdir_xl = readdir_xl;
+
+ ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+ if (ret) {
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open base_"
+ "indices_holder");
+ }
+ goto out;
+ }
+
+ ret = _crawl_directory (fd, &dirloc, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
+ readdir_xl->name);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "Crawl completed "
+ "on %s", readdir_xl->name);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+out:
+ if (fd)
+ fd_unref (fd);
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED ))
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
+ return ret;
+}
+
+char *
+get_crawl_type_in_string (afr_crawl_type_t crawl)
+{
+ char *index = "INDEX";
+ char *full = "FULL";
+ char *crawl_type = NULL;
+
+ if (crawl == INDEX){
+ crawl_type = index;
+ } else if (crawl == FULL) {
+ crawl_type = full;
+ }
+
+ return crawl_type;
+}
+
+static int
+afr_allocate_crawl_event (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ shd_crawl_event_t *crawl_event = NULL;
+ time_t get_time = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ crawl_event = GF_CALLOC (sizeof (shd_crawl_event_t), 1,
+ gf_afr_mt_shd_crawl_event_t);
+ if (!crawl_event) {
+ ret = -1;
+ goto out;
+ }
+
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+
+ crawl_event->start_time_str = gf_strdup (ctime(&get_time));
+
+ crawl_event->crawl_type = get_crawl_type_in_string (crawl);
+ if (!crawl_event->crawl_type) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ shd->crawl_events[child] = crawl_event;
+ }
+ UNLOCK (&priv->lock);
+ ret = 0;
+out:
+ return ret;
+
+}
+
+static int
+afr_put_crawl_event_in_eh (xlator_t *this, int child)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ time_t get_time = 0;
+ shd_crawl_event_t **crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+ crawl_event = (shd_crawl_event_t**)shd->crawl_events;
+ LOCK (&priv->lock);
+ {
+ crawl_event[child]->end_time_str = gf_strdup (ctime(&get_time));
+ ret = eh_save_history (shd->statistics[child],
+ crawl_event[child]);
+ crawl_event[child] = NULL;
+ }
+ UNLOCK (&priv->lock);
+out:
+ return ret;
+}
+
+static int
+afr_dir_exclusive_crawl (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t crawl = _gf_false;
+ int ret = 0;
+ int child = -1;
+ xlator_t *this = NULL;
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+ child = crawl_data->child;
+
+ LOCK (&priv->lock);
+ {
+ if (shd->inprogress[child]) {
+ if (shd->pending[child] != FULL)
+ shd->pending[child] = crawl_data->crawl;
+ } else {
+ shd->inprogress[child] = _gf_true;
+ crawl = _gf_true;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (!crawl) {
+ gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
+ "for %s", priv->children[child]->name);
+ goto out;
+ }
+
+ do {
+ ret = afr_allocate_crawl_event (this, child, crawl_data->crawl);
+ if (ret)
+ goto out;
+ afr_dir_crawl (data);
+
+ ret = afr_put_crawl_event_in_eh (this, child);
+ if (ret < 0)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ if (shd->pending[child] != NONE) {
+ crawl_data->crawl = shd->pending[child];
+ shd->pending[child] = NONE;
+ } else {
+ shd->inprogress[child] = _gf_false;
+ crawl = _gf_false;
+ }
+ }
+ UNLOCK (&priv->lock);
+ } while (crawl);
+out:
+ return ret;
+}
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_crawl_data_t *crawl_data = NULL;
+ int ret = 0;
+ int (*crawler) (void*) = NULL;
+
+ priv = this->private;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ afr_set_lk_owner (frame, this, frame->root);
+ afr_set_low_priority (frame);
+ crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
+ gf_afr_mt_crawl_data_t);
+ if (!crawl_data)
+ goto out;
+ crawl_data->process_entry = process_entry;
+ crawl_data->child = idx;
+ crawl_data->pid = frame->root->pid;
+ crawl_data->crawl = crawl;
+ crawl_data->op_data = op_data;
+ crawl_data->crawl_flags = crawl_flags;
+ gf_log (this->name, GF_LOG_DEBUG, "starting crawl %d for %s",
+ crawl_data->crawl, priv->children[idx]->name);
+
+ if (exclusive)
+ crawler = afr_dir_exclusive_crawl;
+ else
+ crawler = afr_dir_crawl;
+ ret = synctask_new (this->ctx->env, crawler,
+ crawl_done, frame, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "afr crawl failed for child"
+ " %d with ret %d", idx, ret);
+out:
+ return;
+}
+
+void
+afr_build_root_loc (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->path = gf_strdup ("/");
+ loc->name = "";
+ loc->inode = inode_ref (priv->root_inode);
+ uuid_copy (loc->gfid, loc->inode->gfid);
+}
+
+int
+afr_set_root_gfid (dict_t *dict)
+{
+ uuid_t gfid;
+ int ret = 0;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ ret = afr_set_dict_gfid (dict, gfid);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
new file mode 100644
index 000000000..e0c083754
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __AFR_SELF_HEALD_H__
+#define __AFR_SELF_HEALD_H__
+#include "xlator.h"
+
+#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
+#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
+#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
+#define AFR_ALL_CHILDREN -1
+
+typedef struct afr_crawl_data_ {
+ int child;
+ pid_t pid;
+ afr_crawl_type_t crawl;
+ xlator_t *readdir_xl;
+ void *op_data;
+ int crawl_flags;
+ int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+} afr_crawl_data_t;
+
+typedef struct crawl_event_stats_ {
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+ char *start_time_str;
+ char *end_time_str;
+ char *crawl_type;
+ gf_boolean_t crawl_inprogress;
+} shd_crawl_event_t;
+
+void _destroy_crawl_event_data (void *data);
+void _destroy_shd_event_data (void *data);
+
+typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+
+void afr_build_root_loc (xlator_t *this, loc_t *loc);
+
+int afr_set_root_gfid (dict_t *dict);
+
+void
+afr_proactive_self_heal (void *data);
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+
+/*
+ * In addition to its self-heal use, this is used to find a local default
+ * read_child.
+ */
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local);
+#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 1fb0781d8..20306e469 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
@@ -32,48 +24,75 @@
of RENAME */
#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
-
afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
{
uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- ret = fd_ctx_get (fd, this, &ctx);
+ priv = this->private;
- if (ret < 0)
- goto out;
+ ret = __fd_ctx_get (fd, this, &ctx);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ if (ret < 0 && fd_is_anonymous (fd)) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ }
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
out:
return fd_ctx;
}
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
+
+
static void
-afr_pid_save (call_frame_t *frame)
+afr_save_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- local->saved_pid = frame->root->pid;
+ local->saved_lk_owner = frame->root->lk_owner;
}
static void
-afr_pid_restore (call_frame_t *frame)
+afr_restore_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- frame->root->pid = local->saved_pid;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
static void
__mark_all_pending (int32_t *pending[], int child_count,
afr_transaction_type type)
@@ -126,51 +145,23 @@ out:
return;
}
-
static void
-__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
-{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- local = frame->local;
-
- if (!local->fd)
- return;
-
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- if (!fd_ctx)
- goto out;
-
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]--;
- }
- UNLOCK (&local->fd->lock);
-out:
- return;
-}
-
-
-static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+__mark_non_participant_children (int32_t *pending[], int child_count,
+ unsigned char *participants,
+ afr_transaction_type type)
{
int i = 0;
int j = 0;
+ j = afr_index_for_transaction_type (type);
for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
-
- if (!child_up[i])
+ if (!participants[i])
pending[i][j] = 0;
}
}
-static void
+void
__mark_all_success (int32_t *pending[], int child_count,
afr_transaction_type type)
{
@@ -183,6 +174,54 @@ __mark_all_success (int32_t *pending[], int child_count,
}
}
+void
+_set_all_child_errno (int *child_errno, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (child_errno[i] == 0)
+ child_errno[i] = ENOTCONN;
+}
+
+void
+afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+
+ _set_all_child_errno (local->child_errno, priv->child_count);
+
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
+
+
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ if (fd)
+ afr_delayed_changelog_wake_up (this, fd);
+ local->transaction.fop (frame, this);
+}
+
static int
__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
@@ -215,39 +254,7 @@ __changelog_enabled (afr_private_t *priv, afr_transaction_type type)
static int
-__changelog_needed_pre_op (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int op_ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- if (__changelog_enabled (priv, local->transaction.type)) {
- switch (local->op) {
-
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- op_ret = 1;
- break;
-
- case GF_FOP_FLUSH:
- op_ret = 0;
- break;
-
- default:
- op_ret = 1;
- }
- }
-
- return op_ret;
-}
-
-
-static int
-__changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
+__fop_changelog_needed (call_frame_t *frame, xlator_t *this)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
@@ -278,64 +285,42 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
return op_ret;
}
-
-static int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op)
{
int i = 0;
int ret = 0;
+ if (op == LOCAL_FIRST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
+ if (i == child)
+ continue;
ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i], 3 * sizeof (int32_t));
+ pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
/* 3 = data+metadata+entry */
if (ret < 0)
goto out;
}
-
-out:
- return ret;
-}
-
-
-static int
-afr_set_piggyback_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
- afr_transaction_type type)
-{
- int i = 0;
- int ret = 0;
- int *arr = NULL;
- int index = 0;
- size_t pending_xattr_size = 3 * sizeof (int32_t);
- /* 3 = data+metadata+entry */
-
- index = afr_index_for_transaction_type (type);
-
- for (i = 0; i < priv->child_count; i++) {
- arr = GF_CALLOC (1, pending_xattr_size,
- gf_afr_mt_char);
- if (!arr) {
- ret = -1;
- goto out;
- }
-
- memcpy (arr, pending[i], pending_xattr_size);
-
- arr[index]++;
-
- ret = dict_set_bin (xattr, priv->pending_key[i],
- arr, pending_xattr_size);
-
- if (ret < 0)
+ if (op == LOCAL_LAST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
goto out;
}
-
out:
return ret;
}
-
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
{
@@ -363,27 +348,18 @@ afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
int32_t
afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int child_index = 0;
int call_count = -1;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- child_index = (long) cookie;
-
- if (op_ret == 1) {
- }
-
- if (op_ret == 0) {
- __mark_pre_op_undone_on_fd (frame, this, child_index);
- }
-
LOCK (&frame->lock);
{
call_count = --local->call_count;
@@ -391,6 +367,11 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (call_count == 0) {
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+
if (afr_lock_server_count (priv, local->transaction.type) == 0) {
local->transaction.done (frame, this);
} else {
@@ -404,63 +385,266 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
void
-afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_transaction_type type)
+afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, afr_transaction_type type)
{
- int curr_read_child = -1;
- int new_read_child = -1;
+ int i = -1;
+ int count = 0;
+ int read_child = -1;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int **pending = NULL;
int idx = 0;
+ int32_t *stale_children = NULL;
int32_t *fresh_children = NULL;
- size_t success_count = 0;
+ gf_boolean_t rm_stale_children = _gf_false;
idx = afr_index_for_transaction_type (type);
priv = this->private;
local = frame->local;
- curr_read_child = afr_inode_get_read_ctx (this, inode, NULL);
pending = local->pending;
- GF_ASSERT (curr_read_child >= 0);
-
- if (pending[curr_read_child][idx] != 0)
+ if (local->op_ret < 0)
+ goto out;
+ fresh_children = local->fresh_children;
+ read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
+ if (read_child < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Possible split-brain "
+ "for %s", uuid_utoa (inode->gfid));
goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_is_child_present (fresh_children,
+ priv->child_count, i))
+ continue;
+ if (pending[i][idx])
+ continue;
+ /* child is down or op failed on it */
+ if (!stale_children)
+ stale_children = afr_children_create (priv->child_count);
+ if (!stale_children)
+ goto out;
- fresh_children = afr_fresh_children_create (priv->child_count);
- if (!fresh_children)
+ rm_stale_children = _gf_true;
+ stale_children[count++] = i;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale child "
+ "%d for %s", i, uuid_utoa (inode->gfid));
+ }
+
+ if (!rm_stale_children)
goto out;
- for (new_read_child = 0; new_read_child < priv->child_count;
- new_read_child++) {
+ afr_inode_rm_stale_children (this, inode, stale_children);
+out:
+ GF_FREE (stale_children);
+ return;
+}
- if (!priv->child_up[new_read_child])
- /* child is down */
- continue;
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
+{
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
- if (pending[new_read_child][idx] == 0)
- /* op just failed */
- continue;
- fresh_children[success_count] = new_read_child;
- success_count++;
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
+ }
+ return NULL;
+}
+
+unsigned char*
+afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
+{
+ unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ locked_nodes = int_lock->lockee[0].locked_nodes;
+ break;
+ }
+ return locked_nodes;
+}
+
+int
+afr_changelog_pre_op_call_count (afr_transaction_type type,
+ afr_internal_lock_t *int_lock,
+ unsigned int child_count)
+{
+ int call_count = 0;
+ unsigned char *locked_nodes = NULL;
+
+ locked_nodes = afr_locked_nodes_get (type, int_lock);
+ GF_ASSERT (locked_nodes);
+
+ call_count = afr_locked_children_count (locked_nodes, child_count);
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
+
+ return call_count;
+}
+
+int
+afr_changelog_post_op_call_count (afr_transaction_type type,
+ unsigned char *pre_op,
+ unsigned int child_count)
+{
+ int call_count = 0;
+
+ call_count = afr_pre_op_done_children_count (pre_op, child_count);
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
+
+ return call_count;
+}
+
+void
+afr_compute_txn_changelog (afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int index = 0;
+ int32_t postop = 0;
+ int32_t preop = 1;
+ int32_t **txn_changelog = NULL;
+
+ txn_changelog = local->transaction.txn_changelog;
+ index = afr_index_for_transaction_type (local->transaction.type);
+ for (i = 0; i < priv->child_count; i++) {
+ postop = ntoh32 (local->pending[i][index]);
+ txn_changelog[i][index] = hton32 (postop + preop);
+ }
+}
+
+afr_xattrop_type_t
+afr_get_postop_xattrop_type (int32_t **pending, int optimized, int child,
+ afr_transaction_type type)
+{
+ int index = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
+
+ index = afr_index_for_transaction_type (type);
+ if (optimized && !pending[child][index])
+ op = LOCAL_FIRST;
+ return op;
+}
+
+void
+afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
+ int optimized, int child)
+{
+ int32_t **txn_changelog = NULL;
+ int32_t **changelog = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
+
+ priv = this->private;
+ txn_changelog = local->transaction.txn_changelog;
+ op = afr_get_postop_xattrop_type (local->pending, optimized, child,
+ local->transaction.type);
+ if (optimized)
+ changelog = txn_changelog;
+ else
+ changelog = local->pending;
+ ret = afr_set_pending_dict (priv, xattr, changelog, child, op);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
+}
+
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int index = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ index = afr_index_for_transaction_type (local->transaction.type);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->pending[i][index] == 0)
+ return _gf_false;
}
- afr_inode_set_read_ctx (this, inode, fresh_children[0],
- fresh_children);
+ return _gf_true;
+}
+
+static void
+afr_dir_fop_handle_all_fop_failures (call_frame_t *frame)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+
+ if ((local->transaction.type != AFR_ENTRY_TRANSACTION) &&
+ (local->transaction.type != AFR_ENTRY_RENAME_TRANSACTION))
+ return;
+
+ if (local->op_ret >= 0)
+ goto out;
+
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
out:
- if (fresh_children)
- GF_FREE (fresh_children);
return;
}
+static void
+afr_data_handle_quota_errors (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t all_quota_failures = _gf_false;
+
+ local = frame->local;
+ priv = this->private;
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+ /*
+ * Idea is to not leave the file in FOOL-FOOL scenario in case on
+ * all the bricks data transaction failed with EDQUOT to avoid
+ * increasing un-necessary load of self-heals in the system.
+ */
+ all_quota_failures = _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ (local->child_errno[i] != EDQUOT)) {
+ all_quota_failures = _gf_false;
+ break;
+ }
+ }
+ if (all_quota_failures)
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+}
int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
afr_private_t * priv = this->private;
afr_internal_lock_t *int_lock = NULL;
- int ret = 0;
int i = 0;
int call_count = 0;
@@ -468,32 +652,32 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
afr_fd_ctx_t *fdctx = NULL;
dict_t **xattr = NULL;
int piggyback = 0;
- int index = 0;
int nothing_failed = 1;
local = frame->local;
int_lock = &local->internal_lock;
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
+ __mark_non_participant_children (local->pending, priv->child_count,
+ local->transaction.pre_op,
+ local->transaction.type);
+
+ afr_data_handle_quota_errors (frame, this);
+ afr_dir_fop_handle_all_fop_failures (frame);
if (local->fd)
- afr_update_read_child (frame, this, local->fd->inode,
- local->transaction.type);
+ afr_transaction_rm_stale_children (frame, this,
+ local->fd->inode,
+ local->transaction.type);
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
- }
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
+ xattr[i] = dict_new ();
}
+ call_count = afr_changelog_post_op_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
local->call_count = call_count;
if (local->fd)
@@ -501,75 +685,52 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
/* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
-
int_lock->lock_cbk = local->transaction.done;
afr_unlock (frame, this);
- return 0;
+ goto out;
}
- /* check if something has failed, to handle piggybacking */
- nothing_failed = 1;
- index = afr_index_for_transaction_type (local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending[i][index] == 0) {
- nothing_failed = 0;
- break;
- }
- }
+ nothing_failed = afr_txn_nothing_failed (frame, this);
- index = afr_index_for_transaction_type (local->transaction.type);
- if (local->optimistic_change_log &&
- local->transaction.type != AFR_DATA_TRANSACTION) {
- /* if nothing_failed, then local->pending[..] == {0 .. 0} */
- for (i = 0; i < priv->child_count; i++)
- local->pending[i][index]++;
- }
+ afr_compute_txn_changelog (local , priv);
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
+ if (!local->transaction.pre_op[i])
continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
-
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
{
if (!fdctx) {
+ afr_set_postop_dict (local, this, xattr[i],
+ 0, i);
STACK_WIND (frame, afr_changelog_post_op_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
break;
}
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_piggyback[i]) {
- fdctx->pre_op_piggyback[i]--;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
+ /* local->transaction.postop_piggybacked[] was
+ precomputed in is_piggyback_postop() when called from
+ afr_changelog_post_op_safe()
+ */
- if (piggyback && !nothing_failed)
- ret = afr_set_piggyback_dict (priv, xattr[i],
- local->pending,
- local->transaction.type);
+ piggyback = 0;
+ if (local->transaction.postop_piggybacked[i])
+ piggyback = 1;
+
+ afr_set_postop_dict (local, this, xattr[i],
+ piggyback, i);
if (nothing_failed && piggyback) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i], NULL);
} else {
STACK_WIND_COOKIE (frame,
afr_changelog_post_op_cbk,
@@ -577,15 +738,17 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
}
break;
case AFR_METADATA_TRANSACTION:
{
- if (nothing_failed) {
+ if (nothing_failed && local->optimistic_change_log) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -594,28 +757,32 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND (frame, afr_changelog_post_op_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
{
- if (nothing_failed) {
+ if (nothing_failed && local->optimistic_change_log) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
} else {
STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
call_count--;
}
@@ -628,20 +795,17 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
value
*/
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
/* fall through */
case AFR_ENTRY_TRANSACTION:
{
- if (nothing_failed) {
+ if (nothing_failed && local->optimistic_change_log) {
afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -650,13 +814,15 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND (frame, afr_changelog_post_op_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
}
@@ -665,6 +831,7 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
break;
}
+out:
for (i = 0; i < priv->child_count; i++) {
dict_unref (xattr[i]);
}
@@ -675,30 +842,27 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
int32_t
afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = this->private;
- loc_t * loc = NULL;
int call_count = -1;
int child_index = (long) cookie;
local = frame->local;
- loc = &local->loc;
LOCK (&frame->lock);
{
- if (op_ret == 1) {
- /* special op_ret for piggyback */
- }
-
- if (op_ret == 0) {
+ switch (op_ret) {
+ case 0:
__mark_pre_op_done_on_fd (frame, this, child_index);
- }
-
- if (op_ret == -1) {
- local->child_up[child_index] = 0;
-
+ //fallthrough we need to mark the pre_op
+ case 1:
+ local->transaction.pre_op[child_index] = 1;
+ /* special op_ret for piggyback */
+ break;
+ case -1:
if (op_errno == ENOTSUP) {
gf_log (this->name, GF_LOG_ERROR,
"xattrop not supported by %s",
@@ -712,6 +876,7 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
strerror (op_errno));
}
local->op_errno = op_errno;
+ break;
}
call_count = --local->call_count;
@@ -723,19 +888,13 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(local->op_errno == ENOTSUP)) {
local->transaction.resume (frame, this);
} else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
-
- afr_pid_restore (frame);
-
- local->transaction.fop (frame, this);
+ afr_transaction_perform_fop (frame, this);
}
}
return 0;
}
-
int
afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
@@ -747,34 +906,27 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
afr_fd_ctx_t *fdctx = NULL;
afr_local_t *local = NULL;
int piggyback = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ unsigned char *locked_nodes = NULL;
local = frame->local;
+ int_lock = &local->internal_lock;
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
- }
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
+ xattr[i] = dict_new ();
}
+ call_count = afr_changelog_pre_op_call_count (local->transaction.type,
+ int_lock,
+ priv->child_count);
if (call_count == 0) {
- /* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
-
local->internal_lock.lock_cbk =
local->transaction.done;
afr_unlock (frame, this);
- return 0;
+ goto out;
}
local->call_count = call_count;
@@ -785,11 +937,12 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
if (local->fd)
fdctx = afr_fd_ctx_get (local->fd, this);
+ locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
+ if (!locked_nodes[i])
continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
if (ret < 0)
gf_log (this->name, GF_LOG_INFO,
@@ -806,7 +959,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
break;
}
@@ -823,9 +977,12 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
}
UNLOCK (&local->fd->lock);
+ afr_set_delayed_post_op (frame, this);
+
if (piggyback)
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
else
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -833,14 +990,16 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
case AFR_METADATA_TRANSACTION:
{
if (local->optimistic_change_log) {
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -851,7 +1010,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -859,7 +1019,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
@@ -867,7 +1028,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
if (local->optimistic_change_log) {
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
} else {
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -875,7 +1037,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
call_count--;
@@ -890,8 +1053,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
value
*/
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
if (ret < 0)
gf_log (this->name, GF_LOG_INFO,
@@ -903,7 +1066,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
if (local->optimistic_change_log) {
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
+ this, 1, 0, xattr[i],
+ NULL);
break;
}
@@ -914,7 +1078,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
else
STACK_WIND_COOKIE (frame,
afr_changelog_pre_op_cbk,
@@ -922,7 +1087,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->xattrop,
&local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
break;
}
@@ -930,7 +1096,7 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
if (!--call_count)
break;
}
-
+out:
for (i = 0; i < priv->child_count; i++) {
dict_unref (xattr[i]);
}
@@ -1084,12 +1250,14 @@ int
afr_set_transaction_flock (afr_local_t *local)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- int_lock->lk_flock.l_len = local->transaction.len;
- int_lock->lk_flock.l_start = local->transaction.start;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
+ inodelk->flock.l_type = F_WRLCK;
return 0;
}
@@ -1104,6 +1272,7 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
@@ -1117,8 +1286,8 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
case AFR_ENTRY_RENAME_TRANSACTION:
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
break;
case AFR_ENTRY_TRANSACTION:
@@ -1140,12 +1309,6 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int
afr_lock (call_frame_t *frame, xlator_t *this)
{
- afr_pid_save (frame);
-
- frame->root->pid = (long) frame->root;
-
- afr_set_lk_owner (frame, this);
-
afr_set_lock_number (frame, this);
return afr_lock_rec (frame, this);
@@ -1157,28 +1320,463 @@ afr_lock (call_frame_t *frame, xlator_t *this)
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_pre_op (frame, this);
+ } else {
+ afr_transaction_perform_fop (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
+
+ - changelog piggybacking
+ - eager locking
+ */
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local->transaction.eager_lock_on)
+ return;
+
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+gf_boolean_t
+afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ictx = NULL;
+
+ if (!inode) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
+
+ ictx = afr_inode_ctx_get (inode, this);
+ if (!ictx)
+ return _gf_true;
+
+ if (ictx->open_fd_count > 1)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv)
+{
+ if (local->success_count != priv->child_count)
+ return _gf_true;
+ return _gf_false;
+}
+
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+ afr_private_t *priv = NULL;
priv = this->private;
+
local = frame->local;
+ if (!local)
+ goto out;
- if (__changelog_needed_pre_op (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ if (!local->delayed_post_op)
+ goto out;
+
+ //Mark pending changelog ASAP
+ if (afr_any_fops_failed (local, priv))
+ goto out;
+
+ if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this))
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
+{
+ fd_t *fd = NULL;
+
+ fd = data;
+
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
+
+
+/*
+ Check if the frame is destined to get optimized away
+ with changelog piggybacking
+*/
+static gf_boolean_t
+is_piggyback_post_op (call_frame_t *frame, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t piggyback = _gf_true;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ fdctx = afr_fd_ctx_get (fd, frame->this);
+
+ LOCK(&fd->lock);
+ {
+ piggyback = _gf_true;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+ if (fdctx->pre_op_piggyback[i]) {
+ fdctx->pre_op_piggyback[i]--;
+ local->transaction.postop_piggybacked[i] = 1;
+ } else {
+ /* For at least _one_ subvolume we cannot
+ piggyback on the changelog, and have to
+ perform a hard POST-OP and therefore fsync
+ if necesssary
+ */
+ piggyback = _gf_false;
+ GF_ASSERT (fdctx->pre_op_done[i]);
+ fdctx->pre_op_done[i]--;
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ if (!afr_txn_nothing_failed (frame, frame->this)) {
+ /* something failed in this transaction,
+ we will be performing a hard post-op
+ */
+ return _gf_false;
+ }
+
+ return piggyback;
+}
+
+
+/* SET operation */
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&fd->lock);
+
+ return 0;
+}
+
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+ if (!fdctx)
+ return _gf_true;
+
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return witness;
+}
+
+
+int
+afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
- afr_pid_restore (frame);
+ priv = this->private;
+ local = frame->local;
- local->transaction.fop (frame, this);
+ if (afr_fop_failed (op_ret, op_errno)) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
}
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_changelog_post_op_now (frame, this);
+
return 0;
}
int
+afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata)
+ ret = dict_set_int32 (xdata, "batch-fsync", 1);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, xdata);
+ if (!--call_count)
+ break;
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+ int
+afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ if (is_piggyback_post_op (frame, local->fd)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync (frame, this);
+ } else {
+ afr_changelog_post_op_now (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timespec delta = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
+ {
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
+ }
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
+
+out:
+ if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
+ afr_changelog_post_op_safe (prev_frame, this);
+ } else if (stub) {
+ call_resume (stub);
+ }
+}
+
+
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
+}
+
+
+
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+ */
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+}
+
+
+ int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
@@ -1189,7 +1787,20 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- if (__changelog_needed_post_op (frame, this)) {
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+ }
+
+ afr_restore_lk_owner (frame);
+
+ if (__fop_changelog_needed (frame, this)) {
afr_changelog_post_op (frame, this);
} else {
if (afr_lock_server_count (priv, local->transaction.type) == 0) {
@@ -1209,7 +1820,8 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
+ int child_index)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -1218,7 +1830,89 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index
priv = this->private;
__mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
+ child_index, local->transaction.type);
+}
+
+
+
+ static gf_boolean_t
+afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
+
+void
+afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
+
+ priv = this->private;
+
+ if (!local->fd)
+ return;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+
+ if (!priv->eager_lock)
+ return;
+
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
+
+ if (afr_are_multiple_fds_opened (local->fd->inode, this))
+ return;
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
+ }
+unlock:
+ UNLOCK (&local->fd->lock);
}
@@ -1227,20 +1921,43 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
local = frame->local;
priv = this->private;
- afr_transaction_local_init (local, priv);
-
local->transaction.resume = afr_transaction_resume;
local->transaction.type = type;
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
+
+ afr_transaction_eager_lock_init (local, this);
+
+ if (local->fd && local->transaction.eager_lock_on)
+ afr_set_lk_owner (frame, this, local->fd);
+ else
+ afr_set_lk_owner (frame, this, frame->root);
+
+ if (!local->transaction.eager_lock_on && local->loc.inode) {
+ fd = fd_lookup (local->loc.inode, frame->root->pid);
+ if (fd == NULL)
+ fd = fd_lookup_anonymous (local->loc.inode);
+
+ if (fd) {
+ afr_delayed_changelog_wake_up (this, fd);
+ fd_unref (fd);
+ }
+ }
+
if (afr_lock_server_count (priv, local->transaction.type) == 0) {
afr_internal_lock_finish (frame, this);
} else {
afr_lock (frame, this);
}
-
- return 0;
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 4b4428cc5..fa626fd0d 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -1,25 +1,21 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+typedef enum {
+ LOCAL_FIRST = 1,
+ LOCAL_LAST = 2
+} afr_xattrop_type_t;
+
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
@@ -27,7 +23,29 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op);
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success (int32_t *pending[], int child_count,
+ afr_transaction_type type);
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv);
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 851c57fb0..c724eb2ae 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -30,6 +21,11 @@
#endif
#include "afr-common.c"
+#define SHD_INODE_LRU_LIMIT 2048
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+
struct volume_options options[];
int32_t
@@ -37,8 +33,13 @@ notify (xlator_t *this, int32_t event,
void *data, ...)
{
int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- ret = afr_notify (this, event, data);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
return ret;
}
@@ -62,284 +63,139 @@ mem_acct_init (xlator_t *this)
return ret;
}
+
int
-validate_options (xlator_t *this, char **op_errstr)
+xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
{
- int ret = 0;
- volume_opt_list_t *vol_opt = NULL;
- volume_opt_list_t *tmp;
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
- if (!this) {
- gf_log (this->name, GF_LOG_DEBUG, "'this' not a valid ptr");
- ret =-1;
- goto out;
- }
+ list = this->children;
- if (list_empty (&this->volume_options))
- goto out;
-
- vol_opt = list_entry (this->volume_options.next,
- volume_opt_list_t, list);
- list_for_each_entry_safe (vol_opt, tmp, &this->volume_options, list) {
- ret = validate_xlator_volume_options_attacherr (this,
- vol_opt->given_opt,
- op_errstr);
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp (subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
+ }
+ list = list->next;
+ i++;
}
-out:
-
- return ret;
+ return index;
}
+void
+fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
+{
+ if (priv->quorum_count && strcmp(qtype,"fixed")) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "quorum-type %s overriding quorum-count %u",
+ qtype, priv->quorum_count);
+ }
+ if (!strcmp(qtype,"none")) {
+ priv->quorum_count = 0;
+ }
+ else if (!strcmp(qtype,"auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
+}
int
reconfigure (xlator_t *this, dict_t *options)
{
-
- gf_boolean_t metadata_self_heal; /* on/off */
- gf_boolean_t entry_self_heal;
- gf_boolean_t data_self_heal;
- gf_boolean_t data_change_log; /* on/off */
- gf_boolean_t metadata_change_log; /* on/off */
- gf_boolean_t entry_change_log; /* on/off */
- gf_boolean_t strict_readdir;
-
- afr_private_t * priv = NULL;
- xlator_list_t * trav = NULL;
-
- char * read_subvol = NULL;
- char * self_heal = NULL;
- char * change_log = NULL;
- char * str_readdir = NULL;
- char * self_heal_algo = NULL;
-
- int32_t background_count = 0;
- int32_t window_size = 0;
-
- int read_ret = -1;
- int dict_ret = -1;
- int flag = 1;
- int ret = 0;
- int temp_ret = -1;
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
priv = this->private;
- dict_ret = dict_get_int32 (options, "background-self-heal-count",
- &background_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring background self-heal count to %d",
- background_count);
-
- priv->background_self_heal_count = background_count;
- }
+ GF_OPTION_RECONF ("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32,
+ out);
- dict_ret = dict_get_str (options, "metadata-self-heal",
- &self_heal);
- if (dict_ret == 0) {
- temp_ret = gf_string2boolean (self_heal, &metadata_self_heal);
- if (temp_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Reconfiguration Invalid 'option metadata"
- "-self-heal %s'. Defaulting to old value.",
- self_heal);
- ret = -1;
- goto out;
- }
+ GF_OPTION_RECONF ("metadata-self-heal",
+ priv->metadata_self_heal, options, bool, out);
- priv->metadata_self_heal = metadata_self_heal;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option metadata"
- "-self-heal %s'.",
- self_heal);
- }
+ GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
+ out);
- dict_ret = dict_get_str (options, "data-self-heal", &self_heal);
- if (dict_ret == 0) {
- temp_ret = gf_string2boolean (self_heal, &data_self_heal);
- if (temp_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Reconfiguration Invalid 'option data"
- "-self-heal %s'. Defaulting to old value.",
- self_heal);
- ret = -1;
- goto out;
- }
+ GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
+ bool, out);
- priv->data_self_heal = data_self_heal;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option data"
- "-self-heal %s'.", self_heal);
- }
+ GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
+ out);
- dict_ret = dict_get_str (options, "entry-self-heal",
- &self_heal);
- if (dict_ret == 0) {
- temp_ret = gf_string2boolean (self_heal, &entry_self_heal);
- if (temp_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Reconfiguration Invalid 'option data"
- "-self-heal %s'. Defaulting to old value.",
- self_heal);
- ret = -1;
- goto out;
- }
+ GF_OPTION_RECONF ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options,
+ uint32, out);
- priv->entry_self_heal = entry_self_heal;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option entry"
- "-self-heal %s'.", self_heal);
- }
+ GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
+ bool, out);
+ GF_OPTION_RECONF ("metadata-change-log",
+ priv->metadata_change_log, options, bool, out);
- dict_ret = dict_get_str (options, "strict-readdir",
- &str_readdir);
- if (dict_ret == 0) {
- temp_ret = gf_string2boolean (str_readdir, &strict_readdir);
- if (temp_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option strict-readdir %s'. "
- "Defaulting to old value.",
- str_readdir);
- ret = -1;
- goto out;
- }
+ GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
+ bool, out);
- priv->strict_readdir = strict_readdir;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option strict"
- "-readdir %s'.", str_readdir);
- }
+ GF_OPTION_RECONF ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, options, str, out);
- dict_ret = dict_get_int32 (options, "data-self-heal-window-size",
- &window_size);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring, Setting data self-heal window size to %d",
- window_size);
+ GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
- priv->data_self_heal_window_size = window_size;
- }
- else {
- priv->data_self_heal_window_size = 16;
- }
+ GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
+ options, uint32, out);
- dict_ret = dict_get_str (options, "data-change-log", &change_log);
- if (dict_ret == 0) {
- temp_ret = gf_string2boolean (change_log, &data_change_log);
- if (temp_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Reconfiguration Invalid 'option data-"
- "change-log %s'. Defaulting to old value.",
- change_log);
- ret = -1;
+ if (read_subvol) {
+ index = xlator_subvolume_index (this, read_subvol);
+ if (index == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ read_subvol->name);
goto out;
}
-
- priv->data_change_log = data_change_log;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option data-"
- "change-log %s'.", change_log);
+ priv->read_child = index;
}
- dict_ret = dict_get_str (options, "metadata-change-log",
- &change_log);
- if (dict_ret == 0) {
- temp_ret = gf_string2boolean (change_log,
- &metadata_change_log);
- if (temp_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-change-log %s'. "
- "Defaulting to metadata-change-log as 'off'.",
- change_log);
- ret = -1;
- goto out;
- }
-
- priv->metadata_change_log = metadata_change_log;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option metadata-"
- "change-log %s'.", change_log);
- }
+ GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
- dict_ret = dict_get_str (options, "entry-change-log",
- &change_log);
- if (dict_ret == 0) {
- temp_ret = gf_string2boolean (change_log, &entry_change_log);
- if (temp_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-change-log %s'. "
- "Defaulting to entry-change-log as 'on'.",
- change_log);
- ret = -1;
+ if (read_subvol_index >-1) {
+ index=read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ index);
goto out;
}
-
- priv->entry_change_log = entry_change_log;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option entry-"
- "change-log %s'.", change_log);
- }
-
- dict_ret = dict_get_str (options, "data-self-heal-algorithm",
- &self_heal_algo);
- if (dict_ret == 0) {
- /* Handling both strcmp cases - s1 > s2 and s1 < s2 */
-
- if (!strcmp (self_heal_algo, "full")) {
- priv->data_self_heal_algorithm = self_heal_algo;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option data-self"
- "heal-algorithm %s'.", self_heal_algo);
- goto next;
- }
-
- if (!strcmp (self_heal_algo, "diff")) {
- priv->data_self_heal_algorithm = self_heal_algo;
- gf_log (this->name, GF_LOG_DEBUG,
- "Reconfiguring 'option data-self"
- "heal-algorithm %s'.", self_heal_algo);
- goto next;
- }
-
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid self-heal algorithm %s,"
- "defaulting back to old value",
- self_heal_algo);
- ret = -1;
- goto out;
+ priv->read_child = index;
}
- read_ret = dict_get_str (options, "read-subvolume", &read_subvol);
+ GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
+ uint32, out);
+ fix_quorum_options(this,priv,qtype);
+ GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
+ int32, out);
- if (read_ret < 0)
- goto next;// No need to traverse, hence set the next option
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
- trav = this->children;
- flag = 0;
- while (trav) {
- if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Subvolume '%s' specified as read child.",
- trav->xlator->name);
-
- flag = 1;
- break;
- }
-
- trav = trav->next;
- }
+ GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
+ options, size, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("readdir-failover", priv->readdir_failover, options,
+ bool, out);
+ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+ priv->did_discovery = _gf_false;
- if (flag == 0 ) {
- gf_log (this->name, GF_LOG_ERROR,
- "Invalid 'option read-subvolume %s', no such subvolume"
- , read_subvol);
- ret = -1;
- goto out;
- }
-
-next:
+ ret = 0;
out:
return ret;
@@ -354,39 +210,20 @@ static const char *favorite_child_warning_str = "You have specified subvolume '%
"subvolumes. All versions of the file except that on '%s' "
"WILL BE LOST.";
-static const char *no_lock_servers_warning_str = "You have set lock-server-count = 0. "
- "This means correctness is NO LONGER GUARANTEED in all cases. If two or more "
- "applications write to the same region of a file, there is a possibility that "
- "its copies will be INCONSISTENT. Set it to a value greater than 0 unless you "
- "are ABSOLUTELY SURE of what you are doing and WILL NOT HOLD GlusterFS "
- "RESPONSIBLE for inconsistent data. If you are in doubt, set it to a value "
- "greater than 0.";
int32_t
init (xlator_t *this)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- int op_errno = 0;
- char * read_subvol = NULL;
- char * fav_child = NULL;
- char * self_heal = NULL;
- char * algo = NULL;
- char * change_log = NULL;
- char * strict_readdir = NULL;
- char * inodelk_trace = NULL;
- char * entrylk_trace = NULL;
- char * def_val = NULL;
- int32_t background_count = 0;
- int32_t lock_server_count = 1;
- int32_t window_size = 0;
- int fav_ret = -1;
- int read_ret = -1;
- int dict_ret = -1;
-
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ xlator_t *fav_child = NULL;
+ char *qtype = NULL;
if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
@@ -400,288 +237,111 @@ init (xlator_t *this)
"Volume is dangling.");
}
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->read_child_lock);
+ //lock recovery is not done in afr
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->saved_fds);
- read_ret = dict_get_str (this->options, "read-subvolume", &read_subvol);
- priv->read_child = -1;
-
- fav_ret = dict_get_str (this->options, "favorite-child", &fav_child);
- priv->favorite_child = -1;
-
- priv->background_self_heal_count = 16;
-
- dict_ret = dict_get_int32 (this->options, "background-self-heal-count",
- &background_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting background self-heal count to %d",
- background_count);
-
- priv->background_self_heal_count = background_count;
- }
+ child_count = xlator_subvolume_count (this);
- /* Default values */
-
- priv->data_self_heal = 1;
- priv->metadata_self_heal = 1;
- priv->entry_self_heal = 1;
-
- dict_ret = dict_get_str (this->options, "data-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->data_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-self-heal %s'. "
- "Defaulting to data-self-heal as 'on'",
- self_heal);
- priv->data_self_heal = 1;
- }
- }
+ priv->child_count = child_count;
- if (xlator_get_volopt_info (&this->volume_options,
- "data-self-heal-algorithm", &def_val, NULL)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- " data-self-heal-algorithm not found");
- ret = -1;
- goto out;
- } else {
- priv->data_self_heal_algorithm = def_val;
- }
- dict_ret = dict_get_str (this->options, "data-self-heal-algorithm",
- &algo);
- if (dict_ret == 0) {
- priv->data_self_heal_algorithm = gf_strdup (algo);
- }
+ priv->read_child = -1;
- if (xlator_get_volopt_info (&this->volume_options,
- "data-self-heal-window-size",&def_val,
- NULL)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- "data-self-heal-window-size not found");
- ret = -1;
- goto out;
- } else {
- if (gf_string2int32 (def_val,
- (int *)&priv->data_self_heal_window_size)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- "data-self-heal-window-size corrupt");
- ret = -1;
+ GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index (this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ read_subvol->name);
goto out;
}
}
-
- dict_ret = dict_get_int32 (this->options, "data-self-heal-window-size",
- &window_size);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting data self-heal window size to %d",
- window_size);
-
- priv->data_self_heal_window_size = window_size;
- }
-
- dict_ret = dict_get_str (this->options, "metadata-self-heal",
- &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->metadata_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-self-heal %s'. "
- "Defaulting to metadata-self-heal as 'on'.",
- self_heal);
- priv->metadata_self_heal = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->entry_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-self-heal %s'. "
- "Defaulting to entry-self-heal as 'on'.",
- self_heal);
- priv->entry_self_heal = 1;
- }
- }
-
- /* Change log options */
-
- priv->data_change_log = 1;
- priv->metadata_change_log = 1;
- priv->entry_change_log = 1;
- priv->optimistic_change_log = 1;
-
- dict_ret = dict_get_str (this->options, "data-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->data_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-change-log %s'. "
- "Defaulting to data-change-log as 'on'.",
- change_log);
- priv->data_change_log = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "metadata-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log,
- &priv->metadata_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-change-log %s'. "
- "Defaulting to metadata-change-log as 'off'.",
- change_log);
- priv->metadata_change_log = 0;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->entry_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-change-log %s'. "
- "Defaulting to entry-change-log as 'on'.",
- change_log);
- priv->entry_change_log = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "optimistic-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->optimistic_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option optimistic-change-log %s'. "
- "Defaulting to optimistic-change-log as 'on'.",
- change_log);
- priv->optimistic_change_log = 1;
+ GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ read_subvol_index);
+ goto out;
}
+ priv->read_child = read_subvol_index;
}
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
- /* Locking options */
+ GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
- priv->inodelk_trace = 0;
- priv->entrylk_trace = 0;
-
- dict_ret = dict_get_str (this->options, "inodelk-trace",
- &inodelk_trace);
- if (dict_ret == 0) {
- ret = gf_string2boolean (inodelk_trace, &priv->inodelk_trace);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option inodelk-trace %s' ",
- inodelk_trace);
-
- priv->inodelk_trace = 0;
+ priv->favorite_child = -1;
+ GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
+ if (fav_child) {
+ priv->favorite_child = xlator_subvolume_index (this, fav_child);
+ if (priv->favorite_child == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ fav_child->name);
+ goto out;
}
+ gf_log (this->name, GF_LOG_WARNING,
+ favorite_child_warning_str, fav_child->name,
+ fav_child->name, fav_child->name);
}
- dict_ret = dict_get_str (this->options, "entrylk-trace",
- &entrylk_trace);
- if (dict_ret == 0) {
- ret = gf_string2boolean (entrylk_trace, &priv->entrylk_trace);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entrylk-trace %s' ",
- inodelk_trace);
+ GF_OPTION_INIT ("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
- priv->entrylk_trace = 0;
- }
- }
+ GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
+ GF_OPTION_INIT ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, str, out);
- priv->data_lock_server_count = 1;
- priv->metadata_lock_server_count = 0;
- priv->entry_lock_server_count = 1;
+ GF_OPTION_INIT ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
- dict_ret = dict_get_int32 (this->options, "data-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting data lock server count to %d.",
- lock_server_count);
+ GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
+ out);
- if (lock_server_count == 0)
- gf_log (this->name, GF_LOG_WARNING, "%s",
- no_lock_servers_warning_str);
+ GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
- priv->data_lock_server_count = lock_server_count;
- }
+ GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
+ GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
- dict_ret = dict_get_int32 (this->options,
- "metadata-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting metadata lock server count to %d.",
- lock_server_count);
- priv->metadata_lock_server_count = lock_server_count;
- }
+ GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
+ GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
+ out);
- dict_ret = dict_get_int32 (this->options, "entry-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting entry lock server count to %d.",
- lock_server_count);
+ GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
- priv->entry_lock_server_count = lock_server_count;
- }
+ GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
+ bool, out);
- priv->strict_readdir = _gf_false;
-
- dict_ret = dict_get_str (this->options, "strict-readdir",
- &strict_readdir);
- if (dict_ret == 0) {
- ret = gf_string2boolean (strict_readdir, &priv->strict_readdir);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option strict-readdir %s'. "
- "Defaulting to strict-readdir as 'off'.",
- strict_readdir);
- }
- }
+ GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
- trav = this->children;
- while (trav) {
- if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Subvolume '%s' specified as read child.",
- trav->xlator->name);
+ GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
- priv->read_child = child_count;
- }
+ GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
- if (fav_ret == 0 && !strcmp (fav_child, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, trav->xlator->name,
- trav->xlator->name, trav->xlator->name);
- priv->favorite_child = child_count;
- }
+ GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT ("quorum-type", qtype, str, out);
+ GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size,
+ out);
+ fix_quorum_options(this,priv,qtype);
- child_count++;
- trav = trav->next;
- }
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out);
+ GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
+ out);
priv->wait_count = 1;
- priv->child_count = child_count;
-
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
-
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
gf_afr_mt_char);
if (!priv->child_up) {
@@ -720,8 +380,6 @@ init (xlator_t *this)
AFR_XATTR_PREFIX,
trav->xlator->name);
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
ret = -ENOMEM;
goto out;
}
@@ -730,6 +388,13 @@ init (xlator_t *this)
i++;
}
+ ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
+ this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
gf_afr_mt_int32_t);
if (!priv->last_event) {
@@ -737,13 +402,67 @@ init (xlator_t *this)
goto out;
}
- LOCK_INIT (&priv->root_inode_lk);
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
priv->first_lookup = 1;
priv->root_inode = NULL;
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
+ if (!priv->shd.iamshd) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
+ gf_afr_mt_brick_pos_t);
+ if (!priv->shd.pos)
+ goto out;
+
+ priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
+ gf_afr_mt_int32_t);
+ if (!priv->shd.pending)
+ goto out;
+
+ priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
+ child_count, gf_afr_mt_shd_bool_t);
+ if (!priv->shd.inprogress)
+ goto out;
+ priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
+ gf_afr_mt_shd_timer_t);
+ if (!priv->shd.timer)
+ goto out;
+
+ priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.healed)
+ goto out;
+ priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.heal_failed)
+ goto out;
+
+ priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.split_brain)
+ goto out;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+ priv->root_inode = inode_ref (this->itable->root);
+ GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out);
+ GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
+ ret = afr_initialise_statistics (this);
+ if (ret)
+ goto out;
ret = 0;
out:
return ret;
@@ -753,6 +472,13 @@ out:
int
fini (xlator_t *this)
{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
return 0;
}
@@ -771,6 +497,9 @@ struct xlator_fops fops = {
.finodelk = afr_finodelk,
.entrylk = afr_entrylk,
.fentrylk = afr_fentrylk,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
/* inode read */
.access = afr_access,
@@ -778,6 +507,7 @@ struct xlator_fops fops = {
.fstat = afr_fstat,
.readlink = afr_readlink,
.getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
.readv = afr_readv,
/* inode write */
@@ -785,9 +515,11 @@ struct xlator_fops fops = {
.truncate = afr_truncate,
.ftruncate = afr_ftruncate,
.setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
.setattr = afr_setattr,
.fsetattr = afr_fsetattr,
.removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
/* dir read */
.opendir = afr_opendir,
@@ -820,68 +552,242 @@ struct xlator_cbks cbks = {
struct volume_options options[] = {
{ .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"
+ },
+ { .key = {"read-subvolume-index" },
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"
+ },
+ { .key = {"read-hash-mode" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 2,
+ .default_value = "0",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option"
+ "0 = first responder, "
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume), "
+ "2 = hash by GFID of file and client PID",
+ },
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume(i.e. Brick) to read from if "
+ "read-subvolume is not explicitly set.",
},
{ .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "If a split-brain happens choose subvol/brick set by "
+ "this option as source."
},
{ .key = {"background-self-heal-count"},
.type = GF_OPTION_TYPE_INT,
- .min = 0
+ .min = 0,
+ .default_value = "16",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of self-heals that can be "
+ " performed in background without blocking the fop"
},
{ .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable",
+ "0", "off", "no", "false", "disable",
+ "open"},
+ .default_value = "on",
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."
},
{ .key = {"data-self-heal-algorithm"},
.type = GF_OPTION_TYPE_STR,
- .default_value = "",
.description = "Select between \"full\", \"diff\". The "
"\"full\" algorithm copies the entire file from "
"source to sink. The \"diff\" algorithm copies to "
"sink only those blocks whose checksums don't match "
- "with those of source.",
- .value = { "diff", "full" }
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = { "diff", "full"}
},
{ .key = {"data-self-heal-window-size"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
.max = 1024,
- .default_value = "16",
+ .default_value = "1",
.description = "Maximum number blocks per file for which self-heal "
"process would be applied simultaneously."
},
{ .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."
},
{ .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."
},
{ .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Data fops like write/truncate will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Metadata fops like setattr/setxattr will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry fops like create/unlink will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"optimistic-change-log"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."
+ },
+ { .key = {"strict-readdir"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"
+ },
+ { .key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"
+ },
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "aquistion fails on any server, then the held locks "
+ "are unlocked and revert to a blocking locked mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempt to acquire lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking lock as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimzed\" transaction."
+
},
- { .key = {"data-lock-server-count"},
+ { .key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files"
+ "will not be performed if this option is turned off."
+ },
+ { .key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."
+ },
+ { .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "auto", "fixed"},
+ .default_value = "none",
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ { .key = {"quorum-count"},
.type = GF_OPTION_TYPE_INT,
- .min = 0
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks or present. Other quorum types "
+ "will OVERWRITE this value.",
},
- { .key = {"metadata-lock-server-count"},
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ { .key = {"heal-timeout"},
.type = GF_OPTION_TYPE_INT,
- .min = 0
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"
},
- { .key = {"entry-lock-server-count"},
+ { .key = {"post-op-delay-secs"},
.type = GF_OPTION_TYPE_INT,
- .min = 0
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
},
- { .key = {"strict-readdir"},
+ { .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .default_value = "1KB",
+ },
+ { .key = {"readdir-failover"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "readdir(p) will not failover if this option is off",
+ .default_value = "on",
+ },
+ { .key = {"ensure-durability"},
.type = GF_OPTION_TYPE_BOOL,
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
},
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 2ca13078e..21064db58 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -29,11 +20,18 @@
#include "call-stub.h"
#include "compat-errno.h"
#include "afr-mem-types.h"
+#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
+#include "timer.h"
#define AFR_XATTR_PREFIX "trusted.afr"
#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
+
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
struct _pump_private;
@@ -42,28 +40,80 @@ typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
int32_t op_errno);
typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
+ int32_t op_error, int32_t op_errno);
typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
+typedef void (*afr_lookup_done_cbk_t) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno);
+
+typedef enum {
+ AFR_POS_UNKNOWN,
+ AFR_POS_LOCAL,
+ AFR_POS_REMOTE
+} afr_child_pos_t;
+
+typedef enum {
+ SPLIT_BRAIN = 1,
+ ALL_FOOLS = 2
+} afr_subvol_status_t;
+
+typedef enum {
+ AFR_INODE_SET_READ_CTX = 1,
+ AFR_INODE_RM_STALE_CHILDREN,
+ AFR_INODE_SET_OPENDIR_DONE,
+ AFR_INODE_GET_READ_CTX,
+ AFR_INODE_GET_OPENDIR_DONE,
+} afr_inode_op_t;
typedef struct afr_inode_params_ {
- uint64_t mask_type;
+ afr_inode_op_t op;
union {
gf_boolean_t value;
struct {
int32_t read_child;
- int32_t *fresh_children;
+ int32_t *children;
} read_ctx;
} u;
} afr_inode_params_t;
+typedef enum afr_spb_state {
+ DONT_KNOW,
+ SPB,
+ NO_SPB
+} afr_spb_state_t;
+
typedef struct afr_inode_ctx_ {
uint64_t masks;
int32_t *fresh_children;//increasing order of latency
+ afr_spb_state_t mdata_spb;
+ afr_spb_state_t data_spb;
+ uint32_t open_fd_count;
} afr_inode_ctx_t;
+typedef enum {
+ NONE,
+ INDEX,
+ INDEX_TO_BE_HEALED,
+ FULL,
+} afr_crawl_type_t;
+
+typedef struct afr_self_heald_ {
+ gf_boolean_t enabled;
+ gf_boolean_t iamshd;
+ afr_crawl_type_t *pending;
+ gf_boolean_t *inprogress;
+ afr_child_pos_t *pos;
+ gf_timer_t **timer;
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
+ eh_t **statistics;
+ void **crawl_events;
+ char *node_uuid;
+ int timeout;
+} afr_self_heald_t;
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -73,7 +123,6 @@ typedef struct _afr_private {
xlator_t **children;
- gf_lock_t root_inode_lk;
int first_lookup;
inode_t *root_inode;
@@ -81,7 +130,7 @@ typedef struct _afr_private {
char **pending_key;
- gf_boolean_t data_self_heal; /* on/off */
+ char *data_self_heal; /* on/off/open */
char * data_self_heal_algorithm; /* name of algorithm */
unsigned int data_self_heal_window_size; /* max number of pipelined
read/writes */
@@ -96,13 +145,10 @@ typedef struct _afr_private {
gf_boolean_t entry_change_log; /* on/off */
int read_child; /* read-subvolume */
+ unsigned int hash_mode; /* for when read_child is not set */
int favorite_child; /* subvolume to be preferred in resolving
split-brain cases */
- unsigned int data_lock_server_count;
- unsigned int metadata_lock_server_count;
- unsigned int entry_lock_server_count;
-
gf_boolean_t inodelk_trace;
gf_boolean_t entrylk_trace;
@@ -118,36 +164,66 @@ typedef struct _afr_private {
pthread_mutex_t mutex;
struct list_head saved_fds; /* list of fds on which locks have succeeded */
- gf_boolean_t optimistic_change_log;
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
+ afr_self_heald_t shd;
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ gf_boolean_t readdir_failover;
+ uint64_t sh_readdir_size;
+ gf_boolean_t ensure_durability;
+ char *sh_domain;
} afr_private_t;
+typedef enum {
+ AFR_SELF_HEAL_NOT_ATTEMPTED,
+ AFR_SELF_HEAL_STARTED,
+ AFR_SELF_HEAL_FAILED,
+ AFR_SELF_HEAL_SYNC_BEGIN,
+} afr_self_heal_status;
+
typedef struct {
+ afr_self_heal_status gfid_or_missing_entry_self_heal;
+ afr_self_heal_status metadata_self_heal;
+ afr_self_heal_status data_self_heal;
+ afr_self_heal_status entry_self_heal;
+} afr_sh_status_for_all_type;
+
+typedef enum {
+ AFR_SELF_HEAL_ENTRY,
+ AFR_SELF_HEAL_METADATA,
+ AFR_SELF_HEAL_DATA,
+ AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY,
+ AFR_SELF_HEAL_INVALID = -1,
+} afr_self_heal_type;
+
+typedef enum {
+ AFR_CHECK_ALL,
+ AFR_CHECK_SPECIFIC,
+} afr_sh_fail_check_type;
+
+struct afr_self_heal_ {
/* External interface: These are variables (some optional) that
are set by whoever has triggered self-heal */
- gf_boolean_t need_data_self_heal;
- gf_boolean_t need_metadata_self_heal;
- gf_boolean_t need_entry_self_heal;
- gf_boolean_t need_gfid_self_heal;
- gf_boolean_t need_missing_entry_self_heal;
+ gf_boolean_t do_data_self_heal;
+ gf_boolean_t do_metadata_self_heal;
+ gf_boolean_t do_entry_self_heal;
+ gf_boolean_t do_gfid_self_heal;
+ gf_boolean_t do_missing_entry_self_heal;
+ gf_boolean_t force_confirm_spb; /* Check for split-brains even when
+ self-heal is turned off */
gf_boolean_t forced_merge; /* Is this a self-heal triggered to
forcibly merge the directories? */
- gf_boolean_t healing_fd_opened; /* true if caller has already
- opened fd */
-
- gf_boolean_t data_lock_held; /* true if caller has already
- acquired 0-0 lock */
-
- fd_t *healing_fd; /* set if callers has opened fd */
-
gf_boolean_t background; /* do self-heal in background
if possible */
-
ia_type_t type; /* st_mode of the entry we're doing
self-heal on */
inode_t *inode; /* inode on which the self-heal is
@@ -159,7 +235,7 @@ typedef struct {
background, this function will be called as soon as possible. */
int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, int32_t sh_failed);
/* End of external interface members */
@@ -172,7 +248,6 @@ typedef struct {
afr_expunge_done_cbk_t expunge_done;
afr_impunge_done_cbk_t impunge_done;
- int32_t impunge_ret_child;
/* array of xattr's, one for each child */
dict_t **xattr;
@@ -187,6 +262,10 @@ typedef struct {
int32_t *fresh_parent_dirs;
/* array of errno's, one for each child */
int *child_errno;
+ /*loc used for lookup*/
+ loc_t lookup_loc;
+ int32_t lookup_flags;
+ afr_lookup_done_cbk_t lookup_done;
int32_t **pending_matrix;
int32_t **delta_matrix;
@@ -198,39 +277,53 @@ typedef struct {
int source;
int active_source;
int active_sinks;
- int *success;
+ unsigned char *success;
unsigned char *locked_nodes;
int lock_count;
- mode_t impunging_entry_mode;
const char *linkname;
-
- int op_failed;
-
+ gf_boolean_t entries_skipped;
+
+ gf_boolean_t actual_sh_started;
+ gf_boolean_t sync_done;
+ gf_boolean_t data_lock_held;
+ gf_boolean_t sh_dom_lock_held;
+ gf_boolean_t eof_reached;
+ fd_t *healing_fd;
int file_has_holes;
blksize_t block_size;
off_t file_size;
off_t offset;
+ unsigned char *write_needed;
+ uint8_t *checksum;
afr_post_remove_call_t post_remove_call;
- loc_t parent_loc;
+ char *data_sh_info;
+ char *metadata_sh_info;
+ loc_t parent_loc;
call_frame_t *orig_frame;
+ call_frame_t *old_loop_frame;
gf_boolean_t unwound;
- /* private data for the particular self-heal algorithm */
- void *private;
-
- int (*flush_self_heal_cbk) (call_frame_t *frame, xlator_t *this);
+ afr_sh_algo_private_t *private;
+ afr_sh_status_for_all_type afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action;
+ struct afr_sh_algorithm *algo;
+ afr_lock_cbk_t data_lock_success_handler;
+ afr_lock_cbk_t data_lock_failure_handler;
+ gf_boolean_t data_lock_block;
int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
call_frame_t *sh_frame;
-} afr_self_heal_t;
+};
+typedef struct afr_self_heal_ afr_self_heal_t;
typedef enum {
AFR_DATA_TRANSACTION, /* truncate, write, ... */
@@ -292,11 +385,31 @@ afr_index_for_transaction_type (afr_transaction_type type)
return -1; /* make gcc happy */
}
+typedef struct {
+ loc_t loc;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
+
+} afr_entry_lockee_t;
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2);
+
+typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
typedef struct {
loc_t *lk_loc;
- struct gf_flock lk_flock;
+ int lockee_count;
+ afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
@@ -305,24 +418,22 @@ typedef struct {
unsigned char *locked_nodes;
unsigned char *lower_locked_nodes;
- unsigned char *inode_locked_nodes;
- unsigned char *entry_locked_nodes;
selfheal_lk_type_t selfheal_lk_type;
transaction_lk_type_t transaction_lk_type;
int32_t lock_count;
- int32_t inodelk_lock_count;
int32_t entrylk_lock_count;
uint64_t lock_number;
int32_t lk_call_count;
+ int32_t lk_expected_count;
+ int32_t lk_attempted_count;
int32_t lock_op_ret;
int32_t lock_op_errno;
-
- int (*lock_cbk) (call_frame_t *, xlator_t *);
-
+ afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
} afr_internal_lock_t;
typedef struct _afr_locked_fd {
@@ -330,21 +441,29 @@ typedef struct _afr_locked_fd {
struct list_head list;
} afr_locked_fd_t;
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ int32_t op_errno;
+};
+
typedef struct _afr_local {
int uid;
int gid;
unsigned int call_count;
unsigned int success_count;
unsigned int enoent_count;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
- unsigned int govinda_gOvinda;
+ unsigned int unhealable;
unsigned int read_child_index;
unsigned char read_child_returned;
unsigned int first_up_child;
- pid_t saved_pid;
+ gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
@@ -376,12 +495,25 @@ typedef struct _afr_local {
dict_t *dict;
int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
+
- int (*openfd_flush_cbk) (call_frame_t *frame, xlator_t *this);
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
+
+ int allow_sh_for_running_transaction;
+
+
+ /* This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
*/
int op;
@@ -392,13 +524,11 @@ typedef struct _afr_local {
} statfs;
struct {
+ uint32_t parent_entrylk;
uuid_t gfid_req;
inode_t *inode;
struct iatt buf;
struct iatt postparent;
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
dict_t **xattrs;
dict_t *xattr;
struct iatt *postparents;
@@ -406,11 +536,13 @@ typedef struct _afr_local {
int32_t read_child;
int32_t *sources;
int32_t *success_children;
+ int32_t **pending_matrix;
+ gf_boolean_t fresh_lookup;
+ gf_boolean_t possible_spb;
} lookup;
struct {
int32_t flags;
- int32_t wbflags;
} open;
struct {
@@ -429,31 +561,28 @@ typedef struct _afr_local {
struct {
int last_index;
- ino_t ino;
} stat;
struct {
int last_index;
- ino_t ino;
} fstat;
struct {
size_t size;
int last_index;
- ino_t ino;
} readlink;
struct {
char *name;
int last_index;
- long pathinfo_len;
+ long xattr_len;
} getxattr;
struct {
- ino_t ino;
size_t size;
off_t offset;
int last_index;
+ uint32_t flags;
} readv;
/* dir read */
@@ -471,59 +600,43 @@ typedef struct _afr_local {
int32_t op_errno;
size_t size;
off_t offset;
-
+ dict_t *dict;
gf_boolean_t failed;
int last_index;
} readdir;
/* inode write */
struct {
- ino_t ino;
struct iatt prebuf;
struct iatt postbuf;
+ } inode_wfop; //common structure for all inode-write-fops
+ struct {
int32_t op_ret;
struct iovec *vector;
struct iobref *iobref;
int32_t count;
off_t offset;
+ uint32_t flags;
} writev;
struct {
- ino_t ino;
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync;
-
- struct {
- ino_t ino;
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} truncate;
struct {
- ino_t ino;
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} ftruncate;
struct {
- ino_t ino;
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} setattr;
struct {
- ino_t ino;
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} fsetattr;
struct {
@@ -532,116 +645,87 @@ typedef struct _afr_local {
} setxattr;
struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
+
+ struct {
char *name;
} removexattr;
+ struct {
+ dict_t *xattr;
+ } xattrop;
+
+ struct {
+ dict_t *xattr;
+ } fxattrop;
+
/* dir write */
struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- fd_t *fd;
- dict_t *params;
- int32_t flags;
- mode_t mode;
inode_t *inode;
struct iatt buf;
struct iatt preparent;
struct iatt postparent;
- struct iatt read_child_buf;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; //common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
} create;
struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
dev_t dev;
mode_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
} mknod;
struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
int32_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
} mkdir;
struct {
- ino_t parent_ino;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink;
-
- struct {
- int flags;
- ino_t parent_ino;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
+ int flags;
} rmdir;
struct {
- ino_t oldparent_ino;
- ino_t newparent_ino;
- ino_t ino;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preoldparent;
- struct iatt prenewparent;
- struct iatt postoldparent;
- struct iatt postnewparent;
- } rename;
-
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } link;
-
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- inode_t *inode;
dict_t *params;
- struct iatt buf;
- struct iatt read_child_buf;
char *linkpath;
- struct iatt preparent;
- struct iatt postparent;
} symlink;
+ struct {
+ int32_t mode;
+ off_t offset;
+ size_t len;
+ } fallocate;
+
+ struct {
+ off_t offset;
+ size_t len;
+ } discard;
+
struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
+ off_t offset;
+ size_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
+
+
} cont;
struct {
off_t start, len;
+ gf_boolean_t eager_lock_on;
+ int *eager_lock;
+
char *basename;
char *new_basename;
@@ -650,12 +734,19 @@ typedef struct _afr_local {
afr_transaction_type type;
- int success_count;
- int erase_pending;
- int failure_count;
+ /* pre-compute the post piggyback status before
+ entering POST-OP phase
+ */
+ int *postop_piggybacked;
+
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
- int last_tried;
- int32_t *child_errno;
+ struct list_head eager_locked;
+
+ int32_t **txn_changelog;//changelog after pre+post ops
+ unsigned char *pre_op;
call_frame_t *main_frame;
@@ -673,16 +764,32 @@ typedef struct _afr_local {
afr_self_heal_t self_heal;
struct marker_str marker;
+
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
+
+ mode_t umask;
+ int xflag;
+ gf_boolean_t do_discovery;
+ struct afr_reply *replies;
} afr_local_t;
+typedef enum {
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
typedef struct {
unsigned int *pre_op_done;
- unsigned int *opened_on; /* which subvolumes the fd is open on */
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
unsigned int *pre_op_piggyback;
+ unsigned int *lock_piggyback;
+ unsigned int *lock_acquired;
+
int flags;
- int32_t wbflags;
uint64_t up_count; /* number of CHILD_UPs this fd has seen */
uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
@@ -693,19 +800,32 @@ typedef struct {
struct list_head entries; /* needed for readdir failover */
unsigned char *locked_on; /* which subvolumes locks have been successful */
+
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
+ int call_child;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+
+ /* list of frames currently in progress */
+ struct list_head eager_locked;
} afr_fd_ctx_t;
/* try alloc and if it fails, goto label */
-#define ALLOC_OR_GOTO(var, type, label) do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_afr_mt_##type); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
+#define AFR_LOCAL_ALLOC_OR_GOTO(var, label) do { \
+ var = mem_get0 (THIS->local_pool); \
+ if (!var) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "out of memory :("); \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
} while (0);
@@ -726,8 +846,14 @@ int
pump_command_reply (call_frame_t *frame, xlator_t *this);
int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, ...);
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
+
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count);
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
@@ -740,7 +866,7 @@ afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
unsigned char *locked_nodes);
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this);
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
int
afr_set_lock_number (call_frame_t *frame, xlator_t *this);
@@ -764,10 +890,16 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this);
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count);
int pump_start (call_frame_t *frame, xlator_t *this);
int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+
+int
afr_fd_ctx_set (xlator_t *this, fd_t *fd);
int32_t
@@ -777,11 +909,18 @@ void
afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
int32_t *fresh_children);
-void
-afr_build_parent_loc (loc_t *parent, loc_t *child);
-
int
-afr_up_children_count (int child_count, unsigned char *child_up);
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count);
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count);
+
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count);
gf_boolean_t
afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
@@ -798,15 +937,16 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this);
int
afr_frame_return (call_frame_t *frame);
-uint64_t
+gf_boolean_t
afr_is_split_brain (xlator_t *this, inode_t *inode);
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set);
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb);
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags);
+ fd_t *fd, dict_t *xdata);
void
afr_set_opendir_done (xlator_t *this, inode_t *inode);
@@ -821,7 +961,7 @@ int
afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
int
-afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd);
+afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
#define AFR_STACK_UNWIND(fop, frame, params ...) \
do { \
@@ -833,22 +973,27 @@ afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd);
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
- } while (0);
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0)
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
} while (0);
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
/* allocate and return a string that is the basename of argument */
static inline char *
AFR_BASENAME (const char *str)
@@ -862,17 +1007,17 @@ AFR_BASENAME (const char *str)
}
int
-afr_transaction_local_init (afr_local_t *local, afr_private_t *priv);
+afr_transaction_local_init (afr_local_t *local, xlator_t *this);
int32_t
afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
int32_t *
-afr_fresh_children_create (int32_t child_count);
+afr_children_create (int32_t child_count);
int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
int
afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
@@ -884,12 +1029,13 @@ afr_first_up_child (unsigned char *child_up, size_t child_count);
int
afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources);
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid);
void
afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child);
+ int32_t config_read_child, uuid_t gfid);
int32_t
afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
@@ -902,19 +1048,23 @@ afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
int32_t read_child);
void
afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count);
+ int32_t *children, unsigned int child_count);
void
-afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
+afr_children_add_child (int32_t *children, int32_t child,
int32_t child_count);
void
-afr_reset_children (int32_t *fresh_children, int32_t child_count);
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno);
+afr_children_rm_child (int32_t *children, int32_t child,
+ int32_t child_count);
+void
+afr_reset_children (int32_t *children, int32_t child_count);
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio);
int
afr_errno_count (int32_t *children, int *child_errno,
unsigned int child_count, int32_t op_errno);
int
-afr_get_children_count (int32_t *fresh_children, unsigned int child_count);
+afr_get_children_count (int32_t *children, unsigned int child_count);
gf_boolean_t
afr_is_child_present (int32_t *success_children, int32_t child_count,
int32_t child);
@@ -928,7 +1078,7 @@ gf_boolean_t
afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
unsigned int child_count, const char *path,
const char *xlator_name);
-int
+unsigned int
afr_gfid_missing_count (const char *xlator_name, int32_t *children,
struct iatt *bufs, unsigned int child_count,
const char *path);
@@ -941,4 +1091,122 @@ afr_transaction_type_get (ia_type_t ia_type);
int32_t
afr_resultant_errno_get (int32_t *children,
int *child_errno, unsigned int child_count);
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children);
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed));
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open);
+
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this);
+int
+afr_set_elem_count_get (unsigned char *elems, int child_count);
+
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal);
+
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal);
+
+void
+afr_set_low_priority (call_frame_t *frame);
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags);
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv);
+
+void
+afr_matrix_cleanup (int32_t **pending, unsigned int m);
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n);
+
+gf_boolean_t
+afr_is_errno_set (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd);
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count);
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count);
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
+
+/*
+ * Having this as a macro will make debugging a bit weirder, but does reduce
+ * the probability of functions handling this check inconsistently.
+ */
+#define QUORUM_CHECK(_func,_label) do { \
+ if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
+ gf_log(this->name,GF_LOG_WARNING, \
+ "failing "#_func" due to lack of quorum"); \
+ op_errno = EROFS; \
+ goto _label; \
+ } \
+} while (0);
+
+
+#define AFR_SBRAIN_MSG "Failed on %s as split-brain is seen. Returning EIO."
+
+#define AFR_SBRAIN_CHECK_FD(fd, label) do { \
+ if (fd->inode && afr_is_split_brain (this, fd->inode)) { \
+ op_errno = EIO; \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG ,uuid_utoa (fd->inode->gfid)); \
+ goto label; \
+ } \
+} while (0)
+
+#define AFR_SBRAIN_CHECK_LOC(loc, label) do { \
+ if (loc->inode && afr_is_split_brain (this, loc->inode)) { \
+ op_errno = EIO; \
+ loc_path (loc, NULL); \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG , loc->path); \
+ goto label; \
+ } \
+} while (0)
+
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this);
+
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 769e03b14..a7f72fb30 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
+#include <fnmatch.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -28,8 +20,16 @@
#include "afr-common.c"
#include "defaults.c"
+#include "glusterfs.h"
static uint64_t pump_pid = 0;
+static inline void
+pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+{
+ afr_update_loc_gfids (loc, iatt, parent);
+ uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+}
+
static int
pump_mark_start_pending (xlator_t *this)
{
@@ -140,85 +140,17 @@ pump_set_resume_path (xlator_t *this, const char *path)
LOCK (&pump_priv->resume_path_lock);
{
- pump_priv->resume_path = strdup (path);
- if (!pump_priv->resume_path)
- ret = -1;
+ strncpy (pump_priv->resume_path, path, strlen (path) + 1);
}
UNLOCK (&pump_priv->resume_path_lock);
return ret;
}
-static void
-build_child_loc (loc_t *parent, loc_t *child, char *path, char *name)
-{
- child->path = path;
- child->name = name;
-
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
-}
-
-static char *
-build_file_path (loc_t *loc, gf_dirent_t *entry)
-{
- xlator_t *this = NULL;
- char *file_path = NULL;
- int pathlen = 0;
- int total_size = 0;
-
- this = THIS;
-
- pathlen = STRLEN_0 (loc->path);
-
- if (IS_ROOT_PATH (loc->path)) {
- total_size = pathlen + entry->d_len;
- file_path = GF_CALLOC (1, total_size, gf_afr_mt_char);
- if (!file_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return NULL;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "constructing file path of size=%d"
- "pathlen=%d, d_len=%d",
- total_size, pathlen,
- entry->d_len);
-
- snprintf(file_path, total_size, "%s%s", loc->path, entry->d_name);
-
- } else {
- total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */
- file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char);
- if (!file_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return NULL;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "constructing file path of size=%d"
- "pathlen=%d, d_len=%d",
- total_size, pathlen,
- entry->d_len);
-
- snprintf(file_path, total_size, "%s/%s", loc->path, entry->d_name);
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "path=%s and d_name=%s", loc->path, entry->d_name);
- gf_log (this->name, GF_LOG_TRACE,
- "constructed file_path=%s of size=%d", file_path, total_size);
-
- return file_path;
-}
-
static int
pump_save_path (xlator_t *this, const char *path)
{
afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
pump_state_t state;
dict_t *dict = NULL;
loc_t loc = {0};
@@ -230,29 +162,30 @@ pump_save_path (xlator_t *this, const char *path)
return 0;
priv = this->private;
- pump_priv = priv->pump_private;
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s", path, PUMP_PATH);
ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"setxattr failed - could not save path=%s", path);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"setxattr succeeded - saved path=%s", path);
- gf_log (this->name, GF_LOG_DEBUG,
- "Saving path for status info");
}
dict_unref (dict);
+ loc_wipe (&loc);
return 0;
}
@@ -315,15 +248,9 @@ pump_get_resume_path (xlator_t *this)
static int
pump_update_resume_state (xlator_t *this, const char *path)
{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
pump_state_t state;
const char *resume_path = NULL;
- priv = this->private;
- pump_priv = priv->pump_private;
-
state = pump_get_state ();
if (state == PUMP_STATE_RESUME) {
@@ -351,16 +278,10 @@ pump_update_resume_state (xlator_t *this, const char *path)
static gf_boolean_t
is_pump_traversal_allowed (xlator_t *this, const char *path)
{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
pump_state_t state;
const char *resume_path = NULL;
gf_boolean_t ret = _gf_true;
- priv = this->private;
- pump_priv = priv->pump_private;
-
state = pump_get_state ();
if (state == PUMP_STATE_RESUME) {
@@ -403,26 +324,22 @@ pump_save_file_stats (xlator_t *this, const char *path)
static int
gf_pump_traverse_directory (loc_t *loc)
{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
-
- off_t offset = 0;
- loc_t entry_loc;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t entries;
-
- struct iatt iatt, parent;
- dict_t *xattr_rsp;
-
- char *file_path = NULL;
- int ret = 0;
- gf_boolean_t is_directory_empty = _gf_true;
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ loc_t entry_loc = {0};
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t entries;
+ struct iatt iatt = {0};
+ struct iatt parent = {0};
+ dict_t *xattr_rsp = NULL;
+ int ret = 0;
+ gf_boolean_t is_directory_empty = _gf_true;
+ gf_boolean_t free_entries = _gf_false;
INIT_LIST_HEAD (&entries.list);
this = THIS;
- priv = this->private;
GF_ASSERT (loc->inode);
@@ -444,7 +361,8 @@ gf_pump_traverse_directory (loc_t *loc)
"pump opendir on %s returned=%d",
loc->path, ret);
- while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
+ while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
+ free_entries = _gf_true;
if (list_empty (&entries.list)) {
gf_log (this->name, GF_LOG_TRACE,
@@ -456,24 +374,23 @@ gf_pump_traverse_directory (loc_t *loc)
gf_log (this->name, GF_LOG_DEBUG,
"found readdir entry=%s", entry->d_name);
- file_path = build_file_path (loc, entry);
- if (!file_path) {
- gf_log (this->name, GF_LOG_DEBUG,
- "file path construction failed");
- goto out;
+ offset = entry->d_off;
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ loc->path, entry->d_name);
+ continue;
}
-
- build_child_loc (loc, &entry_loc, file_path, entry->d_name);
+ loc_wipe (&entry_loc);
+ ret = afr_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret)
+ goto out;
if (!IS_ENTRY_CWD (entry->d_name) &&
- !IS_ENTRY_PARENT (entry->d_name)) {
+ !IS_ENTRY_PARENT (entry->d_name)) {
is_directory_empty = _gf_false;
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, &xattr_rsp, &parent);
-
- memcpy (entry_loc.inode->gfid, iatt.ia_gfid, 16);
-
gf_log (this->name, GF_LOG_DEBUG,
"lookup %s => %"PRId64,
entry_loc.path,
@@ -482,12 +399,14 @@ gf_pump_traverse_directory (loc_t *loc)
ret = syncop_lookup (this, &entry_loc, NULL,
&iatt, &xattr_rsp, &parent);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "second lookup ret=%d: %s => %"PRId64,
- ret,
- entry_loc.path,
- iatt.ia_ino);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: lookup failed",
+ entry_loc.path);
+ continue;
+ }
+ pump_fill_loc_info (&entry_loc, &iatt,
+ &parent);
pump_update_resume_state (this, entry_loc.path);
@@ -501,10 +420,6 @@ gf_pump_traverse_directory (loc_t *loc)
goto out;
}
- gf_log (this->name, GF_LOG_TRACE,
- "type of file=%d, IFDIR=%d",
- iatt.ia_type, IA_IFDIR);
-
if (IA_ISDIR (iatt.ia_type)) {
if (is_pump_traversal_allowed (this, entry_loc.path)) {
gf_log (this->name, GF_LOG_TRACE,
@@ -513,18 +428,21 @@ gf_pump_traverse_directory (loc_t *loc)
gf_pump_traverse_directory (&entry_loc);
}
}
- }
- offset = entry->d_off;
- loc_wipe (&entry_loc);
+ }
}
gf_dirent_free (&entries);
+ free_entries = _gf_false;
gf_log (this->name, GF_LOG_TRACE,
"offset incremented to %d",
(int32_t ) offset);
}
+ ret = syncop_close (fd);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
+
if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
pump_change_state (this, PUMP_STATE_RUNNING);
gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
@@ -532,34 +450,18 @@ gf_pump_traverse_directory (loc_t *loc)
}
out:
+ if (entry_loc.path)
+ loc_wipe (&entry_loc);
+ if (free_entries)
+ gf_dirent_free (&entries);
return 0;
-
-}
-
-void
-build_root_loc (inode_t *inode, loc_t *loc)
-{
- loc->path = "/";
- loc->name = "";
- loc->inode = inode;
- loc->ino = 1;
- loc->inode->ino = 1;
- memset (loc->inode->gfid, 0, 16);
- loc->inode->gfid[15] = 1;
-
}
static int
pump_update_resume_path (xlator_t *this)
{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
const char *resume_path = NULL;
- priv = this->private;
- pump_priv = priv->pump_private;
-
resume_path = pump_get_resume_path (this);
if (resume_path) {
@@ -580,10 +482,9 @@ pump_update_resume_path (xlator_t *this)
static int32_t
pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
loc_t loc = {0};
int i = 0;
int ret = 0;
@@ -591,9 +492,8 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
int sink = 1;
priv = this->private;
- pump_priv = priv->pump_private;
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
ret = syncop_removexattr (priv->children[source], &loc,
PUMP_PATH);
@@ -609,6 +509,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
"failed with %s", strerror (errno));
}
+ loc_wipe (&loc);
return pump_command_reply (frame, this);
}
@@ -628,7 +529,7 @@ pump_complete_migration (xlator_t *this)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
@@ -640,6 +541,10 @@ pump_complete_migration (xlator_t *this)
pump_priv->pump_finished = _gf_true;
dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SOURCE_COMPLETE);
ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
if (ret < 0) {
@@ -647,6 +552,10 @@ pump_complete_migration (xlator_t *this)
"setxattr failed - while notifying source complete");
}
dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SINK_COMPLETE);
ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0);
if (ret < 0) {
@@ -662,24 +571,11 @@ pump_complete_migration (xlator_t *this)
call_resume (pump_priv->cleaner);
}
+ loc_wipe (&loc);
return 0;
}
static int
-pump_set_root_gfid (dict_t *dict)
-{
- uuid_t gfid;
- int ret = 0;
-
- memset (gfid, 0, 16);
- gfid[15] = 1;
-
- ret = afr_set_dict_gfid (dict, gfid);
-
- return ret;
-}
-
-static int
pump_lookup_sink (loc_t *loc)
{
xlator_t *this = NULL;
@@ -692,7 +588,7 @@ pump_lookup_sink (loc_t *loc)
xattr_req = dict_new ();
- ret = pump_set_root_gfid (xattr_req);
+ ret = afr_set_root_gfid (xattr_req);
if (ret)
goto out;
@@ -731,7 +627,7 @@ pump_task (void *data)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
xattr_req = dict_new ();
if (!xattr_req) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -740,14 +636,13 @@ pump_task (void *data)
goto out;
}
- pump_set_root_gfid (xattr_req);
+ afr_set_root_gfid (xattr_req);
ret = syncop_lookup (this, &loc, xattr_req,
&iatt, &xattr_rsp, &parent);
gf_log (this->name, GF_LOG_TRACE,
- "lookup: ino=%"PRId64", path=%s",
- loc.ino,
- loc.path);
+ "lookup: path=%s gfid=%s",
+ loc.path, uuid_utoa (loc.inode->gfid));
ret = pump_check_and_update_status (this);
if (ret < 0) {
@@ -756,7 +651,7 @@ pump_task (void *data)
pump_update_resume_path (this);
- pump_set_root_gfid (xattr_req);
+ afr_set_root_gfid (xattr_req);
ret = pump_lookup_sink (&loc);
if (ret) {
pump_update_resume_path (this);
@@ -770,6 +665,7 @@ out:
if (xattr_req)
dict_unref (xattr_req);
+ loc_wipe (&loc);
return 0;
}
@@ -779,12 +675,10 @@ pump_task_completion (int ret, call_frame_t *sync_frame, void *data)
{
xlator_t *this = NULL;
afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
this = THIS;
priv = this->private;
- pump_priv = priv->pump_private;
inode_unref (priv->root_inode);
STACK_DESTROY (sync_frame->root);
@@ -805,7 +699,7 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
priv = this->private;
pump_priv = priv->pump_private;
- pump_frame->root->lk_owner = (uint64_t) (unsigned long)pump_frame->root;
+ afr_set_lk_owner (pump_frame, this, pump_frame->root);
pump_pid = (uint64_t) (unsigned long)pump_frame->root;
ret = synctask_new (pump_priv->env, pump_task,
@@ -819,8 +713,8 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
}
gf_log (this->name, GF_LOG_DEBUG,
- "setting pump as started lk_owner: %"PRIu64" %"PRIu64,
- pump_frame->root->lk_owner, pump_pid);
+ "setting pump as started lk_owner: %s %"PRIu64,
+ lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
priv->use_afr_in_pump = 1;
out:
@@ -854,7 +748,7 @@ pump_cmd_start_setxattr_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
call_frame_t *prev = NULL;
@@ -906,9 +800,9 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
- data = data_ref (dict_get (local->dict, PUMP_CMD_START));
+ data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
if (!data) {
ret = -1;
gf_log (this->name, GF_LOG_ERROR,
@@ -947,7 +841,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
PUMP_SINK_CHILD(this)->fops->setxattr,
&loc,
dict,
- 0);
+ 0, NULL);
ret = 0;
@@ -961,6 +855,7 @@ out:
if (ret && clnt_cmd)
GF_FREE (clnt_cmd);
+ loc_wipe (&loc);
return ret;
}
@@ -980,7 +875,7 @@ pump_cmd_start_getxattr_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_local_t *local = NULL;
char *path = NULL;
@@ -1047,6 +942,7 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
uint64_t number_files = 0;
char filename[PATH_MAX];
+ char summary[PATH_MAX+256];
char *dict_str = NULL;
int32_t op_ret = 0;
@@ -1075,16 +971,19 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
}
if (pump_priv->pump_finished) {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ",
- number_files);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64, number_files);
} else {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ",
- number_files, filename);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64":current_file=%s",
+ number_files, filename);
}
+ snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
+ (pump_priv->pump_finished)?1:0, summary);
dict = dict_new ();
- ret = dict_set_dynstr (dict, PUMP_CMD_STATUS, dict_str);
+ ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"dict_set_dynstr returned negative value");
@@ -1096,13 +995,12 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
out:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
if (dict)
dict_unref (dict);
- if (dict_str)
- GF_FREE (dict_str);
+ GF_FREE (dict_str);
return 0;
}
@@ -1144,14 +1042,14 @@ pump_execute_start (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
STACK_WIND (frame,
pump_cmd_start_getxattr_cbk,
PUMP_SOURCE_CHILD(this),
PUMP_SOURCE_CHILD(this)->fops->getxattr,
&loc,
- PUMP_PATH);
+ PUMP_PATH, NULL);
ret = 0;
@@ -1161,6 +1059,7 @@ out:
pump_command_reply (frame, this);
}
+ loc_wipe (&loc);
return 0;
}
@@ -1168,7 +1067,7 @@ static int
pump_cleanup_helper (void *data) {
call_frame_t *frame = data;
- pump_xattr_cleaner (frame, 0, frame->this, 0, 0);
+ pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
return 0;
}
@@ -1194,14 +1093,6 @@ pump_execute_commit (call_frame_t *frame, xlator_t *this)
pump_priv = priv->pump_private;
local = frame->local;
-
- LOCK (&pump_priv->resume_path_lock);
- {
- pump_priv->number_files_pumped = 0;
- pump_priv->current_file[0] = '\0';
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
local->op_ret = 0;
if (pump_priv->pump_finished) {
pump_change_state (this, PUMP_STATE_COMMIT);
@@ -1258,7 +1149,7 @@ pump_execute_abort (call_frame_t *frame, xlator_t *this)
} else {
pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
pump_xattr_cleaner,
- 0, 0);
+ 0, 0, NULL);
}
return 0;
@@ -1271,7 +1162,7 @@ pump_command_status (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_STATUS, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump status command");
@@ -1295,7 +1186,7 @@ pump_command_pause (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_PAUSE, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump pause command");
@@ -1319,7 +1210,7 @@ pump_command_commit (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_COMMIT, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump commit command");
@@ -1343,7 +1234,7 @@ pump_command_abort (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_ABORT, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump abort command");
@@ -1367,7 +1258,7 @@ pump_command_start (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_START, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump start command");
@@ -1389,7 +1280,7 @@ struct _xattr_key {
struct list_head list;
};
-static void
+static int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -1401,13 +1292,14 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
static void
@@ -1435,7 +1327,7 @@ __filter_xattrs (dict_t *dict)
int32_t
pump_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -1470,7 +1362,7 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie,
children[next_call_child],
children[next_call_child]->fops->getxattr,
&local->loc,
- local->cont.getxattr.name);
+ local->cont.getxattr.name, NULL);
}
out:
@@ -1478,7 +1370,7 @@ out:
if (op_ret >= 0 && dict)
__filter_xattrs (dict);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
}
return 0;
@@ -1486,13 +1378,13 @@ out:
int32_t
pump_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
afr_private_t * priv = NULL;
xlator_t ** children = NULL;
int call_child = 0;
afr_local_t *local = NULL;
- int32_t op_ret = -1;
+ int32_t ret = -1;
int32_t op_errno = 0;
uint64_t read_child = 0;
@@ -1505,15 +1397,21 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv->children, out);
children = priv->children;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_getxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
if (name) {
if (!strncmp (name, AFR_XATTR_PREFIX,
@@ -1523,39 +1421,31 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- if (!strcmp (name, PUMP_CMD_STATUS)) {
+ if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
gf_log (this->name, GF_LOG_DEBUG,
"Hit pump command - status");
pump_execute_status (frame, this);
- op_ret = 0;
+ ret = 0;
goto out;
}
}
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_getxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->getxattr,
- loc, name);
- return 0;
- }
-
local->fresh_children = GF_CALLOC (priv->child_count,
sizeof (*local->fresh_children),
gf_afr_mt_int32_t);
- if (local->fresh_children) {
+ if (!local->fresh_children) {
+ ret = -1;
op_errno = ENOMEM;
goto out;
}
read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
+ ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
&local->cont.getxattr.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
loc_copy (&local->loc, loc);
@@ -1565,13 +1455,12 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
(void *) (long) call_child,
children[call_child], children[call_child]->fops->getxattr,
- loc, name);
+ loc, name, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1579,11 +1468,9 @@ static int
afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
@@ -1595,14 +1482,14 @@ afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
+ local->op_ret, local->op_errno, NULL);
}
return 0;
}
static int
afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -1654,7 +1541,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1671,7 +1558,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->setxattr,
&local->loc,
local->cont.setxattr.dict,
- local->cont.setxattr.flags);
+ local->cont.setxattr.flags, NULL);
if (!--call_count)
break;
@@ -1699,11 +1586,9 @@ pump_setxattr_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
+ AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -1721,12 +1606,10 @@ pump_command_reply (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_INFO,
"Command succeeded");
- dict_unref (local->dict);
-
AFR_STACK_UNWIND (setxattr,
frame,
local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
return 0;
}
@@ -1763,50 +1646,53 @@ pump_parse_command (call_frame_t *frame, xlator_t *this,
int
pump_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
-
int ret = -1;
-
- int op_ret = -1;
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict,
+ op_errno, out);
+
priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_setxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+ }
+
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- ret = AFR_LOCAL_INIT (local, priv);
+ ret = afr_local_init (local, priv, &op_errno);
if (ret < 0) {
- op_errno = -ret;
+ afr_local_cleanup (local, this);
goto out;
- }
+ }
ret = pump_parse_command (frame, this,
local, dict);
if (ret >= 0) {
- op_ret = 0;
+ ret = 0;
goto out;
}
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_setxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->setxattr,
- loc, dict, flags);
- return 0;
- }
-
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory.");
+ op_errno = ENOMEM;
+ ret = -1;
+ afr_local_cleanup (local, this);
goto out;
}
@@ -1829,12 +1715,12 @@ pump_setxattr (call_frame_t *frame, xlator_t *this,
afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
}
return 0;
@@ -1868,7 +1754,7 @@ static int32_t
pump_truncate (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1878,11 +1764,11 @@ pump_truncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
loc,
- offset);
+ offset, xdata);
return 0;
}
- afr_truncate (frame, this, loc, offset);
+ afr_truncate (frame, this, loc, offset, xdata);
return 0;
}
@@ -1891,7 +1777,7 @@ static int32_t
pump_ftruncate (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1901,11 +1787,11 @@ pump_ftruncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
fd,
- offset);
+ offset, xdata);
return 0;
}
- afr_ftruncate (frame, this, fd, offset);
+ afr_ftruncate (frame, this, fd, offset, xdata);
return 0;
}
@@ -1914,7 +1800,7 @@ pump_ftruncate (call_frame_t *frame,
int
pump_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *parms)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1922,10 +1808,10 @@ pump_mknod (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, parms);
+ loc, mode, rdev, umask, xdata);
return 0;
}
- afr_mknod (frame, this, loc, mode, rdev, parms);
+ afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
return 0;
}
@@ -1934,7 +1820,7 @@ pump_mknod (call_frame_t *frame, xlator_t *this,
int
pump_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1942,10 +1828,10 @@ pump_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
- afr_mkdir (frame, this, loc, mode, params);
+ afr_mkdir (frame, this, loc, mode, umask, xdata);
return 0;
}
@@ -1954,7 +1840,7 @@ pump_mkdir (call_frame_t *frame, xlator_t *this,
static int32_t
pump_unlink (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1963,10 +1849,10 @@ pump_unlink (call_frame_t *frame,
default_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
- afr_unlink (frame, this, loc);
+ afr_unlink (frame, this, loc, xflag, xdata);
return 0;
}
@@ -1974,7 +1860,7 @@ pump_unlink (call_frame_t *frame,
static int
pump_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
@@ -1984,11 +1870,11 @@ pump_rmdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
+ loc, flags, xdata);
return 0;
}
- afr_rmdir (frame, this, loc, flags);
+ afr_rmdir (frame, this, loc, flags, xdata);
return 0;
}
@@ -1997,7 +1883,7 @@ pump_rmdir (call_frame_t *frame, xlator_t *this,
int
pump_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2005,10 +1891,10 @@ pump_symlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
return 0;
}
- afr_symlink (frame, this, linkpath, loc, params);
+ afr_symlink (frame, this, linkpath, loc, umask, xdata);
return 0;
}
@@ -2018,7 +1904,7 @@ static int32_t
pump_rename (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2027,10 +1913,10 @@ pump_rename (call_frame_t *frame,
default_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
- afr_rename (frame, this, oldloc, newloc);
+ afr_rename (frame, this, oldloc, newloc, xdata);
return 0;
}
@@ -2040,7 +1926,7 @@ static int32_t
pump_link (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2049,10 +1935,10 @@ pump_link (call_frame_t *frame,
default_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
- afr_link (frame, this, oldloc, newloc);
+ afr_link (frame, this, oldloc, newloc, xdata);
return 0;
}
@@ -2061,7 +1947,7 @@ pump_link (call_frame_t *frame,
static int32_t
pump_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2069,10 +1955,10 @@ pump_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
- afr_create (frame, this, loc, flags, mode, fd, params);
+ afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
return 0;
}
@@ -2082,8 +1968,7 @@ static int32_t
pump_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd,
- int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2092,10 +1977,10 @@ pump_open (call_frame_t *frame,
default_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
- afr_open (frame, this, loc, flags, fd, wbflags);
+ afr_open (frame, this, loc, flags, fd, xdata);
return 0;
}
@@ -2107,8 +1992,8 @@ pump_writev (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t off,
- struct iobref *iobref)
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2120,20 +2005,20 @@ pump_writev (call_frame_t *frame,
fd,
vector,
count,
- off,
- iobref);
+ off, flags,
+ iobref, xdata);
return 0;
}
- afr_writev (frame, this, fd, vector, count, off, iobref);
- return 0;
+ afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
static int32_t
pump_flush (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2142,10 +2027,10 @@ pump_flush (call_frame_t *frame,
default_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
- afr_flush (frame, this, fd);
+ afr_flush (frame, this, fd, xdata);
return 0;
}
@@ -2155,7 +2040,7 @@ static int32_t
pump_fsync (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2165,10 +2050,10 @@ pump_fsync (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
fd,
- flags);
+ flags, xdata);
return 0;
}
- afr_fsync (frame, this, fd, flags);
+ afr_fsync (frame, this, fd, flags, xdata);
return 0;
}
@@ -2177,7 +2062,7 @@ pump_fsync (call_frame_t *frame,
static int32_t
pump_opendir (call_frame_t *frame,
xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2186,10 +2071,10 @@ pump_opendir (call_frame_t *frame,
default_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
- afr_opendir (frame, this, loc, fd);
+ afr_opendir (frame, this, loc, fd, xdata);
return 0;
}
@@ -2199,7 +2084,7 @@ static int32_t
pump_fsyncdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2209,10 +2094,10 @@ pump_fsyncdir (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
fd,
- flags);
+ flags, xdata);
return 0;
}
- afr_fsyncdir (frame, this, fd, flags);
+ afr_fsyncdir (frame, this, fd, flags, xdata);
return 0;
}
@@ -2223,7 +2108,7 @@ pump_xattrop (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2234,10 +2119,10 @@ pump_xattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->xattrop,
loc,
flags,
- dict);
+ dict, xdata);
return 0;
}
- afr_xattrop (frame, this, loc, flags, dict);
+ afr_xattrop (frame, this, loc, flags, dict, xdata);
return 0;
}
@@ -2247,7 +2132,7 @@ pump_fxattrop (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2258,10 +2143,10 @@ pump_fxattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->fxattrop,
fd,
flags,
- dict);
+ dict, xdata);
return 0;
}
- afr_fxattrop (frame, this, fd, flags, dict);
+ afr_fxattrop (frame, this, fd, flags, dict, xdata);
return 0;
}
@@ -2271,9 +2156,17 @@ static int32_t
pump_removexattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
+ name, op_errno, out);
+
+ op_errno = 0;
priv = this->private;
if (!priv->use_afr_in_pump) {
STACK_WIND (frame,
@@ -2281,10 +2174,14 @@ pump_removexattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
loc,
- name);
+ name, xdata);
return 0;
}
- afr_removexattr (frame, this, loc, name);
+ afr_removexattr (frame, this, loc, name, xdata);
+
+ out:
+ if (op_errno)
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -2296,7 +2193,7 @@ pump_readdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t off)
+ off_t off, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2305,21 +2202,18 @@ pump_readdir (call_frame_t *frame,
default_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
+ fd, size, off, xdata);
return 0;
}
- afr_readdir (frame, this, fd, size, off);
+ afr_readdir (frame, this, fd, size, off, xdata);
return 0;
}
static int32_t
-pump_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
+pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *dict)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2328,10 +2222,10 @@ pump_readdirp (call_frame_t *frame,
default_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
+ fd, size, off, dict);
return 0;
}
- afr_readdirp (frame, this, fd, size, off);
+ afr_readdirp (frame, this, fd, size, off, dict);
return 0;
}
@@ -2362,13 +2256,24 @@ pump_release (xlator_t *this,
}
+static int32_t
+pump_forget (xlator_t *this, inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_forget (this, inode);
+
+ return 0;
+}
static int32_t
pump_setattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2377,10 +2282,10 @@ pump_setattr (call_frame_t *frame,
default_setattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
- afr_setattr (frame, this, loc, stbuf, valid);
+ afr_setattr (frame, this, loc, stbuf, valid, xdata);
return 0;
}
@@ -2391,7 +2296,7 @@ pump_fsetattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2400,10 +2305,10 @@ pump_fsetattr (call_frame_t *frame,
default_fsetattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
- afr_fsetattr (frame, this, fd, stbuf, valid);
+ afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
return 0;
}
@@ -2452,7 +2357,7 @@ notify (xlator_t *this, int32_t event,
child_xl = (xlator_t *) data;
- ret = afr_notify (this, event, data);
+ ret = afr_notify (this, event, data, NULL);
switch (event) {
case GF_EVENT_CHILD_DOWN:
@@ -2487,7 +2392,7 @@ init (xlator_t *this)
xlator_list_t * trav = NULL;
int i = 0;
int ret = -1;
- int op_errno = 0;
+ GF_UNUSED int op_errno = 0;
int source_child = 0;
@@ -2503,26 +2408,42 @@ init (xlator_t *this)
"Volume is dangling.");
}
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->read_child_lock);
+ //lock recovery is not done in afr
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->saved_fds);
+
+ child_count = xlator_subvolume_count (this);
+ if (child_count != 2) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "There should be exactly 2 children - one source "
+ "and one sink");
+ return -1;
+ }
+ priv->child_count = child_count;
priv->read_child = source_child;
priv->favorite_child = source_child;
priv->background_self_heal_count = 0;
- priv->data_self_heal = 1;
+ priv->data_self_heal = "on";
priv->metadata_self_heal = 1;
priv->entry_self_heal = 1;
- priv->data_self_heal_algorithm = "";
-
priv->data_self_heal_window_size = 16;
priv->data_change_log = 1;
priv->metadata_change_log = 1;
priv->entry_change_log = 1;
priv->use_afr_in_pump = 1;
+ priv->sh_readdir_size = 65536;
/* Locking options */
@@ -2531,31 +2452,9 @@ init (xlator_t *this)
and the sink.
*/
- priv->data_lock_server_count = 2;
- priv->metadata_lock_server_count = 2;
- priv->entry_lock_server_count = 2;
-
priv->strict_readdir = _gf_false;
- trav = this->children;
- while (trav) {
- child_count++;
- trav = trav->next;
- }
-
priv->wait_count = 1;
-
- if (child_count != 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "There should be exactly 2 children - one source "
- "and one sink");
- return -1;
- }
- priv->child_count = child_count;
-
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
-
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
gf_afr_mt_char);
if (!priv->child_up) {
@@ -2602,6 +2501,12 @@ init (xlator_t *this)
i++;
}
+ ret = gf_asprintf (&priv->sh_domain, "%s-self-heal", this->name);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
priv->first_lookup = 1;
priv->root_inode = NULL;
@@ -2633,7 +2538,7 @@ init (xlator_t *this)
goto out;
}
- pump_priv->env = syncenv_new (0);
+ pump_priv->env = this->ctx->env;
if (!pump_priv->env) {
gf_log (this->name, GF_LOG_ERROR,
"Could not create new sync-environment");
@@ -2641,10 +2546,16 @@ init (xlator_t *this)
goto out;
}
- priv->pump_private = pump_priv;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
+ priv->pump_private = pump_priv;
pump_change_state (this, PUMP_STATE_ABORT);
@@ -2656,6 +2567,25 @@ out:
int
fini (xlator_t *this)
{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ if (!priv)
+ goto out;
+
+ pump_priv = priv->pump_private;
+ if (!pump_priv)
+ goto afr_priv;
+
+ GF_FREE (pump_priv->resume_path);
+ LOCK_DESTROY (&pump_priv->resume_path_lock);
+ LOCK_DESTROY (&pump_priv->pump_state_lock);
+ GF_FREE (pump_priv);
+afr_priv:
+ afr_priv_destroy (priv);
+out:
return 0;
}
@@ -2703,6 +2633,7 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
.release = pump_release,
.releasedir = pump_releasedir,
+ .forget = pump_forget,
};
struct volume_options options[] = {
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
index 027524227..bc4c31a78 100644
--- a/xlators/cluster/afr/src/pump.h
+++ b/xlators/cluster/afr/src/pump.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __PUMP_H__
@@ -26,16 +17,6 @@
#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
-#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
-#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
-
-#define PUMP_CMD_START "trusted.glusterfs.pump.start"
-#define PUMP_CMD_COMMIT "trusted.glusterfs.pump.commit"
-#define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort"
-#define PUMP_CMD_PAUSE "trusted.glusterfs.pump.pause"
-#define PUMP_CMD_STATUS "trusted.glusterfs.pump.status"
-
#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
@@ -54,7 +35,7 @@ typedef enum {
typedef struct _pump_private {
struct syncenv *env; /* The env pointer to the pump synctask */
- const char *resume_path; /* path to resume from the last pause */
+ char *resume_path; /* path to resume from the last pause */
gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
pump_state_t pump_state; /* State of pump */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index cf883a974..174bea841 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -2,31 +2,33 @@
xlator_LTLIBRARIES = dht.la nufa.la switch.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
- dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module -avoid-version
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module -avoid-version
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module -avoidversion
+switch_la_LDFLAGS = -module -avoid-version
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = dht-common.h dht-common.c dht-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = dht-common.h dht-mem-types.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
CLEANFILES =
uninstall-local:
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 3ccb89ecc..8f61339e6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -31,17 +22,18 @@
#include "dht-common.h"
#include "defaults.h"
#include "byte-order.h"
+#include "glusterfs-acl.h"
#include <sys/time.h>
#include <libgen.h>
-void
+int
dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
{
dict_t *dst = NULL;
int64_t *ptr = 0, *size = NULL;
int32_t ret = -1;
- data_pair_t *data_pair = NULL;
+ data_t *dict_data = NULL;
dst = data;
@@ -53,32 +45,37 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
if (size == NULL) {
gf_log ("dht", GF_LOG_WARNING,
"memory allocation failed");
- return;
+ return -1;
}
ret = dict_set_bin (dst, key, size, sizeof (int64_t));
if (ret < 0) {
gf_log ("dht", GF_LOG_WARNING,
"dht aggregate dict set failed");
GF_FREE (size);
- return;
+ return -1;
}
}
ptr = data_to_bin (value);
if (ptr == NULL) {
gf_log ("dht", GF_LOG_WARNING, "data to bin failed");
- return;
+ return -1;
}
*size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime (THIS, dst, key, value);
+ if (ret < 0)
+ return ret;
} else {
/* compare user xattrs only */
if (!strncmp (key, "user.", strlen ("user."))) {
- ret = dict_lookup (dst, key, &data_pair);
- if (!ret && data) {
- ret = is_data_equal (data_pair->value, value);
+ ret = dict_lookup (dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal (dict_data, value);
if (!ret)
- gf_log ("dht", GF_LOG_WARNING,
+ gf_log ("dht", GF_LOG_DEBUG,
"xattr mismatch for %s", key);
}
}
@@ -87,7 +84,7 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed");
}
- return;
+ return 0;
}
@@ -114,7 +111,7 @@ out:
int
dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -134,7 +131,12 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
ret = dht_layout_set (this, local->inode, layout);
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
&local->stbuf, local->xattr, &local->postparent);
@@ -145,12 +147,261 @@ out:
int
+dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
+
+ if (!main_frame)
+ return 0;
+
+ if (local->file_count && local->dir_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset (this, local->cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)", local->loc.path,
+ (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
+ if ((ret > 0) && (ret == conf->subvolume_cnt)) {
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ if (local->inode)
+ dht_layout_set (this, local->inode, layout);
+ }
+
+ DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return ret;
+}
+
+
+int
+dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on %s",
+ local->loc.path, prev->this->name);
+ }
+
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (is_dir) {
+ local->dir_count ++;
+ } else {
+ local->file_count ++;
+
+ if (!is_linkfile) {
+ /* real file */
+ local->cached_subvol = prev->this;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
+
+ local->op_ret = 0;
+
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt) || attempt_unwind) {
+ dht_discover_complete (this, frame);
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->link_xattr_name);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ uuid_copy (local->gfid, loc->gfid);
+
+ discover_frame = copy_frame (frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (discover_frame, dht_discover_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+
+int
dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, dict_t *xattr,
struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
@@ -164,7 +415,6 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_VALIDATE_OR_GOTO ("dht", this->private, out);
GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- conf = this->private;
local = frame->local;
prev = cookie;
@@ -191,7 +441,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_ret, op_errno, xattr);
if (op_ret == -1) {
- local->op_errno = ENOENT;
+ local->op_errno = op_errno;
gf_log (this->name, GF_LOG_DEBUG,
"lookup of %s on %s returned error (%s)",
local->loc.path, prev->this->name,
@@ -250,6 +500,12 @@ unlock:
dht_layout_set (this, local->inode, layout);
}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
@@ -259,6 +515,7 @@ unlock:
selfheal:
FRAME_SU_DO (frame, dht_local_t);
+ uuid_copy (local->loc.gfid, local->gfid);
ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
&local->loc, layout);
out:
@@ -279,6 +536,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
int is_dir = 0;
int is_linkfile = 0;
+ call_frame_t *copy = NULL;
+ dht_local_t *copy_local = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -305,12 +564,20 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
strerror (op_errno));
}
if (op_errno == ESTALE) {
- /* propogate the ESTALE to parent.
+ /* propagate the ESTALE to parent.
* setting local->return_estale would send
* ESTALE to parent. */
local->return_estale = 1;
}
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG (local->loc.inode->ia_type)) {
+ local->need_lookup_everywhere = 1;
+ }
+ }
goto unlock;
}
@@ -329,7 +596,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
layout = local->layout;
is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
if (is_linkfile) {
gf_log (this->name, GF_LOG_INFO,
@@ -341,6 +609,23 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (is_dir) {
+ ret = dht_dir_has_layout (xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->stbuf.ia_ctime,
+ local->stbuf.ia_ctime_nsec,
+ stbuf->ia_ctime,
+ stbuf->ia_ctime_nsec)) {
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+ }
+ }
+ if (local->stbuf.ia_type != IA_INVAL)
+ {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid)) {
+ local->need_selfheal = 1;
+ }
+ }
ret = dht_layout_dir_mismatch (this, layout,
prev->this, &local->loc,
xattr);
@@ -379,7 +664,28 @@ out:
&& (conf && conf->unhashed_sticky_bit)) {
local->stbuf.ia_prot.sticky = 1;
}
- if (local->layout_mismatch) {
+ if (local->need_selfheal) {
+ local->need_selfheal = 0;
+ uuid_copy (local->gfid, local->stbuf.ia_gfid);
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ copy = create_frame (this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init (copy, &local->loc,
+ NULL, 0);
+ if (!copy_local)
+ goto cont;
+ copy_local->stbuf = local->stbuf;
+ copy->local = copy_local;
+ FRAME_SU_DO (copy, dht_local_t);
+ ret = synctask_new (this->ctx->env,
+ dht_dir_attr_heal,
+ dht_dir_attr_heal_done,
+ copy, copy);
+ }
+ }
+cont:
+ if (local->layout_mismatch) {
/* Found layout mismatch in the directory, need to
fix this in the inode context */
dht_layout_unref (this, local->layout);
@@ -388,13 +694,29 @@ out:
return 0;
}
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+
+ /* We know that current cached subvol is no more
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ dht_lookup_everywhere (frame, this, &local->loc);
+ return 0;
+ }
if (local->return_estale) {
local->op_ret = -1;
local->op_errno = ESTALE;
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
@@ -410,7 +732,8 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -443,9 +766,16 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
local->stbuf.ia_prot.sticky = 1;
}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
unwind:
- WIPE (&local->postparent);
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
@@ -461,7 +791,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
dht_local_t *local = NULL;
xlator_t *hashed_subvol = NULL;
xlator_t *cached_subvol = NULL;
-
+ dht_layout_t *layout = NULL;
local = frame->local;
hashed_subvol = local->hashed_subvol;
@@ -489,6 +819,46 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
return 0;
}
+ if (local->need_lookup_everywhere) {
+ if (uuid_compare (local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol (this, cached_subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ ret = dht_layout_set (this, local->inode, layout);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to set layout for subvol %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_INFO,
"cannot create linkfile file for %s on %s: "
@@ -509,8 +879,12 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, local->xattr,
@@ -524,7 +898,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
hashed_subvol->name);
ret = dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
+ dht_lookup_linkfile_create_cbk, this,
cached_subvol, hashed_subvol, &local->loc);
return ret;
@@ -534,7 +908,8 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
int
dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
int this_call_cnt = 0;
@@ -553,7 +928,6 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
inode_t *inode, struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
@@ -562,7 +936,9 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *subvol = NULL;
loc_t *loc = NULL;
xlator_t *link_subvol = NULL;
- int ret = -1;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -570,10 +946,9 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_VALIDATE_OR_GOTO ("dht", cookie, out);
GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- conf = this->private;
-
local = frame->local;
loc = &local->loc;
+ conf = this->private;
prev = cookie;
subvol = prev->this;
@@ -595,7 +970,8 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc->path, prev->this->name);
}
- is_linkfile = check_is_linkfile (inode, buf, xattr);
+ is_linkfile = check_is_linkfile (inode, buf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, buf, xattr);
if (is_linkfile) {
@@ -636,7 +1012,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* This is where we need 'rename' both entries logic */
gf_log (this->name, GF_LOG_WARNING,
"multiple subvolumes (%s and %s) have "
- "file %s (preferrably rename the file "
+ "file %s (preferably rename the file "
"in the backend, and do a fresh lookup)",
local->cached_subvol->name,
subvol->name, local->loc.path);
@@ -647,12 +1023,17 @@ unlock:
UNLOCK (&frame->lock);
if (is_linkfile) {
- gf_log (this->name, GF_LOG_INFO,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc);
- return 0;
+ ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+ /* Delete the linkfile only if there are no open fds on it.
+ if there is a open-fd, it may be in migration */
+ if (!ret && (fd_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "deleting stale linkfile %s on %s",
+ loc->path, subvol->name);
+ STACK_WIND (frame, dht_lookup_unlink_cbk,
+ subvol, subvol->fops->unlink, loc, 0, NULL);
+ return 0;
+ }
}
this_call_cnt = dht_frame_return (frame);
@@ -732,7 +1113,16 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) failed (%s)",
local->loc.path, subvol->name, strerror (op_errno));
- goto err;
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
}
if (check_is_dir (inode, stbuf, xattr)) {
@@ -742,7 +1132,7 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
goto err;
}
- if (check_is_linkfile (inode, stbuf, xattr)) {
+ if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) reached link",
local->loc.path, subvol->name);
@@ -770,9 +1160,13 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
op_errno = EINVAL;
}
-unwind:
- WIPE (postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+unwind:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
@@ -816,9 +1210,13 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->xattr = NULL;
}
- if (!uuid_is_null (local->gfid))
+ if (!uuid_is_null (local->gfid)) {
ret = dict_set_static_bin (local->xattr_req, "gfid-req",
local->gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid", local->loc.path);
+ }
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
@@ -849,7 +1247,6 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
call_frame_t *prev = NULL;
int ret = 0;
- uint64_t tmp_layout = 0;
dht_layout_t *parent_layout = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
@@ -880,8 +1277,10 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
(loc->parent)) {
- ret = inode_ctx_get (loc->parent, this, &tmp_layout);
- parent_layout = (dht_layout_t *)(long)tmp_layout;
+ ret = dht_inode_ctx_layout_get (loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout)
+ goto out;
if (parent_layout->search_unhashed) {
local->op_errno = ENOENT;
dht_lookup_everywhere (frame, this, loc);
@@ -910,7 +1309,8 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
if (!is_linkfile) {
/* non-directory and not a linkfile */
@@ -950,14 +1350,51 @@ out:
* from each of the subvolume. See dht_iatt_merge for reference.
*/
- WIPE (postparent);
+ if (!op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
err:
return 0;
}
+/* For directories, check if acl xattrs have been requested (by the acl xlator),
+ * if not, request for them. These xattrs are needed for dht dir self-heal to
+ * perform proper self-healing of dirs
+ */
+void
+dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+{
+ int ret = 0;
+
+ GF_ASSERT (inode);
+ GF_ASSERT (xattr_req);
+
+ if (inode->ia_type != IA_IFDIR)
+ return;
+
+ if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
+
+ if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
+
+ return;
+}
int
dht_lookup (call_frame_t *frame, xlator_t *this,
@@ -965,7 +1402,6 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
{
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int ret = -1;
@@ -973,26 +1409,33 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
dht_layout_t *layout = NULL;
int i = 0;
int call_cnt = 0;
-
+ loc_t new_loc = {0,};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
conf = this->private;
if (!conf)
goto err;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- if (!dht_filter_loc_subvol_key (this, loc, &local->loc,
- &hashed_subvol)) {
- ret = loc_dup (loc, &local->loc);
+
+ ret = dht_filter_loc_subvol_key (this, loc, &new_loc,
+ &hashed_subvol);
+ if (ret) {
+ loc_wipe (&local->loc);
+ ret = loc_dup (&new_loc, &local->loc);
+
+ /* we no more need 'new_loc' entries */
+ loc_wipe (&new_loc);
+
+ /* check if loc_dup() is successful */
if (ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_DEBUG,
@@ -1008,16 +1451,19 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
local->xattr_req = dict_new ();
}
+ if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) &&
+ !__is_root_gfid (loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_discover (frame, this, loc);
+ return 0;
+ }
+
if (!hashed_subvol)
hashed_subvol = dht_subvol_get_hashed (this, loc);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
-
- local->cached_subvol = cached_subvol;
local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -1037,35 +1483,59 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
goto do_fresh_lookup;
}
- local->inode = inode_ref (loc->inode);
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
+ local->inode = inode_ref (loc->inode);
/* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
* revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
- for (i = 0; i < layout->cnt; i++) {
+ if (IA_ISDIR (local->inode->ia_type)) {
+ local->call_cnt = call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_revalidate_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ call_cnt = local->call_cnt = layout->cnt;
+
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+
+ for (i = 0; i < call_cnt; i++) {
subvol = layout->list[i].xlator;
STACK_WIND (frame, dht_revalidate_cbk,
subvol, subvol->fops->lookup,
&local->loc, local->xattr_req);
- if (!--call_cnt)
- break;
}
} else {
do_fresh_lookup:
/* TODO: remove the hard-coding */
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ conf->link_xattr_name, 256);
+
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -1100,287 +1570,8 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int
-dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->prebuf, prebuf, prev->this);
- dht_iatt_merge (this, &local->stbuf, postbuf, prev->this);
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (truncate, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
-err:
- return 0;
-}
-
-
-
-int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf);
-err:
- return 0;
-}
-
-
-int
-dht_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;;
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->truncate,
- loc, offset);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
-
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -1388,7 +1579,7 @@ err:
int
dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1412,14 +1603,18 @@ dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent = *postparent;
local->preparent = *preparent;
- WIPE (&local->postparent);
- WIPE (&local->preparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
}
unlock:
UNLOCK (&frame->lock);
DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -1428,7 +1623,7 @@ unlock:
int
dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1440,7 +1635,8 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
- if (op_ret == -1) {
+ if ((op_ret == -1) && !((op_errno == ENOENT) ||
+ (op_errno == ENOTCONN))) {
local->op_errno = op_errno;
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
@@ -1453,7 +1649,7 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unlock:
UNLOCK (&frame->lock);
- if (op_ret == -1)
+ if (local->op_ret == -1)
goto err;
cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
@@ -1467,26 +1663,24 @@ unlock:
STACK_WIND (frame, dht_unlink_cbk,
cached_subvol, cached_subvol->fops->unlink,
- &local->loc);
+ &local->loc, local->flags, NULL);
return 0;
err:
DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
-
int
-dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf)
+dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
-
local = frame->local;
prev = cookie;
@@ -1506,293 +1700,274 @@ unlock:
UNLOCK (&frame->lock);
this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- prebuf, postbuf);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
return 0;
}
-
-
-int
-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+static void
+fill_layout_info (dht_layout_t *layout, char *buf)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ int i = 0;
+ char tmp_buf[128] = {0,};
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, local->op_errno);
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf (tmp_buf, 128, "(%s %u %u)",
+ layout->list[i].xlator->name,
+ layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat (buf, " ");
+ strcat (buf, tmp_buf);
}
-
- return 0;
}
-
-int
-dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+void
+dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
+ char *xattr_buf, int32_t alloc_len,
+ int flag, char *layout_buf)
{
- DHT_STACK_UNWIND (access, frame, op_ret, op_errno);
- return 0;
+ if (flag && local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name,
+ layout_buf);
+ else if (local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "(<"DHT_PATHINFO_HEADER"%s> %s)",
+ this->name, local->xattr_val);
+ else if (flag)
+ snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
+ this->name, layout_buf);
}
-
int
-dht_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ int ret = -1;
+ char *value = NULL;
+ int32_t plen = 0;
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ ret = dict_get_str (xattr, local->xsel, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Subvolume %s returned -1 (%s)", this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
}
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_access_cbk,
- subvol, subvol->fops->access,
- loc, mask);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno);
-
- return 0;
-}
-
+ local->alloc_len += strlen(value);
-int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path, struct iatt *sbuf)
-{
- dht_local_t *local = NULL;
+ if (!local->xattr_val) {
+ local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
- local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (local->xattr_val) {
+ plen = strlen (local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC (local->xattr_val,
+ local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
+ (void) strcat (local->xattr_val, value);
+ (void) strcat (local->xattr_val, " ");
+ local->op_ret = 0;
}
-err:
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf);
+ ret = 0;
- return 0;
+ out:
+ return ret;
}
-
int
-dht_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {0,};
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (flag)
+ fill_layout_info (local->layout, layout_buf);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size);
-
- return 0;
+ *dict = dict_new ();
+ if (!*dict)
+ goto out;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
+ local->xattr_val[strlen (local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we dont have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen (this->name))
+ + strlen (layout_buf)
+ + 40;
+ xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
- return 0;
-}
+ if (XATTR_IS_PATHINFO (local->xsel)) {
+ (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
+ local->alloc_len, flag,
+ layout_buf);
+ } else if (XATTR_IS_NODE_UUID (local->xsel)) {
+ (void) snprintf (xattr_buf, local->alloc_len, "%s",
+ local->xattr_val);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown local->xsel (%s)", local->xsel);
+ goto out;
+ }
+ ret = dict_set_dynstr (*dict, local->xsel, xattr_buf);
+ GF_FREE (local->xattr_val);
-static void
-fill_layout_info (dht_layout_t *layout, char *buf)
-{
- int i = 0;
- char tmp_buf[128] = {0,};
-
- for (i = 0; i < layout->cnt; i++) {
- snprintf (tmp_buf, 128, "(%s %u %u)",
- layout->list[i].xlator->name,
- layout->list[i].start,
- layout->list[i].stop);
- if (i)
- strcat (buf, " ");
- strcat (buf, tmp_buf);
- }
+ out:
+ return ret;
}
int
-dht_pathinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
int ret = 0;
- int flag = 0;
+ dht_local_t *local = NULL;
int this_call_cnt = 0;
- char *value_got = NULL;
- char layout_buf[8192] = {0,};
- char *xattr_buf = NULL;
dict_t *dict = NULL;
- int32_t alloc_len = 0;
- int32_t plen = 0;
- local = frame->local;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
- if (op_ret != -1) {
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value_got);
- if (!ret) {
- alloc_len = strlen (value_got);
+ local = frame->local;
- /**
- * allocate the buffer:- we allocate 10 bytes extra in case we need to
- * append ' Link: ' in the buffer for another STACK_WIND
- */
- if (!local->pathinfo) {
- alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
- local->pathinfo = GF_CALLOC (alloc_len, sizeof (char), gf_common_mt_char);
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr err (%s) for dir",
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
}
- if (local->pathinfo) {
- plen = strlen (local->pathinfo);
- if (plen) {
- alloc_len += plen;
- local->pathinfo = GF_REALLOC (local->pathinfo,
- alloc_len);
- if (!local->pathinfo)
- goto out;
- }
-
- strcat (local->pathinfo, value_got);
- }
+ goto unlock;
}
+
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
}
+ unlock:
+ UNLOCK (&frame->lock);
- out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->layout->cnt > 1) {
- /* Set it for directory */
- fill_layout_info (local->layout, layout_buf);
- flag = 1;
- }
+ if (!is_last_call (this_call_cnt))
+ goto out;
- dict = dict_new ();
+ /* -- last call: do patch ups -- */
- /* we would need max-to-max this many bytes to create pathinfo string */
- alloc_len += (2 * strlen (this->name)) + strlen (layout_buf) + 40;
- xattr_buf = GF_CALLOC (alloc_len, sizeof (char), gf_common_mt_char);
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- if (flag && local->pathinfo)
- snprintf (xattr_buf, alloc_len, "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
- this->name, local->pathinfo, this->name,
- layout_buf);
- else if (local->pathinfo)
- snprintf (xattr_buf, alloc_len, "(<"DHT_PATHINFO_HEADER"%s> %s)",
- this->name, local->pathinfo);
- else if (flag)
- snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
- this->name, layout_buf);
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
- ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY,
- xattr_buf);
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
- if (local->pathinfo)
- GF_FREE (local->pathinfo);
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
+ out:
+ return 0;
+}
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+int
+dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ call_frame_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
- if (dict)
- dict_unref (dict);
+ local = frame->local;
+ prev = cookie;
- return 0;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 "
+ "(%s)", prev->this->name, strerror (op_errno));
+ goto unwind;
}
- if (local->pathinfo)
- strcat (local->pathinfo, " Link: ");
- if (local->hashed_subvol) {
- /* This will happen if there pending */
- STACK_WIND (frame, dht_pathinfo_getxattr_cbk, local->hashed_subvol,
- local->hashed_subvol->fops->getxattr,
- &local->loc, local->key);
-
- return 0;
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
+ goto unwind;
}
- gf_log ("this->name", GF_LOG_ERROR, "Unable to find hashed_subvol for path"
- " %s", local->pathinfo);
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
+
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
+
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno,
+ NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, dict);
return 0;
}
int
dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
int ret = 0;
char *value = NULL;
@@ -1807,21 +1982,24 @@ dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
return 0;
}
int
dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
int this_call_cnt = 0;
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ conf = this->private;
local = frame->local;
this_call_cnt = dht_frame_return (frame);
@@ -1829,8 +2007,8 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!xattr || (op_ret == -1))
goto out;
- if (dict_get (xattr, "trusted.glusterfs.dht")) {
- dict_del (xattr, "trusted.glusterfs.dht");
+ if (dict_get (xattr, conf->xattr_name)) {
+ dict_del (xattr, conf->xattr_name);
}
local->op_ret = 0;
@@ -1838,30 +2016,97 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->xattr = dict_copy_with_ref (xattr, NULL);
} else {
/* first aggregate everything into xattr and then copy into
- * local->xattr.
+ * local->xattr. This is required as we want to have
+ * 'local->xattr' as the proper final dictionary passed above
+ * distribute xlator.
*/
dht_aggregate_xattr (xattr, local->xattr);
+ local->xattr = dict_copy (xattr, local->xattr);
}
out:
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, local->xattr);
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, NULL);
}
return 0;
}
int32_t
dht_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
+dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+
+ if (op_ret != -1) {
+ if (local->xattr)
+ dict_unref (local->xattr);
+ local->xattr = dict_ref (xattr);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+ local->xattr_req = dict_ref (xdata);
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, local->xattr_req);
+ }
+
+ return 0;
+}
+
+
+int
+dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
+
+
+ local = frame->local;
+ layout = local->layout;
+
+ cnt = local->call_cnt = layout->cnt;
+
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
+ }
+
+ return 0;
+}
+
+
+int
dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+ loc_t *loc, const char *key, dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
{
+
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
xlator_t *cached_subvol = NULL;
@@ -1870,7 +2115,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
dht_layout_t *layout = NULL;
xlator_t **sub_volumes = NULL;
int op_errno = -1;
- int ret = 0;
int i = 0;
int cnt = 0;
@@ -1878,57 +2122,94 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "layout is NULL");
- op_errno = ENOENT;
- goto err;
- }
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
goto err;
}
- local->layout = layout;
-
- if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (key) {
local->key = gf_strdup (key);
if (!local->key) {
op_errno = ENOMEM;
-
goto err;
}
+ }
- local->call_cnt = 1;
- if (hashed_subvol != cached_subvol) {
- local->call_cnt = 2;
- local->hashed_subvol = hashed_subvol;
+ if (key &&
+ (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
+ && DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
+ return 0;
+ }
+
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+ if (key && DHT_IS_DIR(layout) &&
+ ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)
+ || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
+ (void) strncpy (local->xsel, key, 256);
+ cnt = local->call_cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_vgetxattr_dir_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
}
+ return 0;
+ }
+
+ /* node-uuid or pathinfo for files */
+ if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
+ || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) {
+ cached_subvol = local->cached_subvol;
+ (void) strncpy (local->xsel, key, 256);
- STACK_WIND (frame, dht_pathinfo_getxattr_cbk, cached_subvol,
- cached_subvol->fops->getxattr, loc, key);
+ local->call_cnt = 1;
+ STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol->fops->getxattr, loc, key, NULL);
return 0;
}
+
if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
if (hashed_subvol == cached_subvol) {
op_errno = ENODATA;
goto err;
@@ -1936,7 +2217,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (hashed_subvol) {
STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
hashed_subvol->fops->getxattr, loc,
- GF_XATTR_PATHINFO_KEY);
+ GF_XATTR_PATHINFO_KEY, NULL);
return 0;
}
op_errno = ENODATA;
@@ -1944,13 +2225,13 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
if (key && (!strcmp (GF_XATTR_MARKER_KEY, key))
- && (-1 == frame->root->pid)) {
-
- if (loc->inode-> ia_type == IA_IFDIR) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
cnt = layout->cnt;
} else {
cnt = 1;
}
+
sub_volumes = alloca ( cnt * sizeof (xlator_t *));
for (i = 0; i < cnt; i++)
*(sub_volumes + i) = layout->list[i].xlator;
@@ -1958,7 +2239,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (cluster_getmarkerattr (frame, this, loc, key,
local, dht_getxattr_unwind,
sub_volumes, cnt,
- MARKER_UUID_TYPE, conf->vol_uuid)) {
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ conf->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
@@ -1968,8 +2250,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (key && *conf->vol_uuid) {
if ((match_uuid_local (key, conf->vol_uuid) == 0) &&
- (-1 == frame->root->pid)) {
- if (loc->inode-> ia_type == IA_IFDIR) {
+ (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
cnt = layout->cnt;
} else {
cnt = 1;
@@ -1982,6 +2264,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
local, dht_getxattr_unwind,
sub_volumes, cnt,
MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
conf->vol_uuid)) {
op_errno = EINVAL;
goto err;
@@ -1991,6 +2274,60 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
+ if (DHT_IS_DIR(layout)) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
+ }
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+#undef DHT_IS_DIR
+
+int
+dht_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
if (key) {
local->key = gf_strdup (key);
if (!local->key) {
@@ -1999,7 +2336,9 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- if (loc->inode-> ia_type == IA_IFDIR) {
+ if ((fd->inode->ia_type == IA_IFDIR)
+ && (strncmp (key, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY) != 0))) {
cnt = local->call_cnt = layout->cnt;
} else {
cnt = local->call_cnt = 1;
@@ -2008,32 +2347,45 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
for (i = 0; i < cnt; i++) {
subvol = layout->list[i].xlator;
STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->getxattr,
- loc, key);
+ subvol, subvol->fops->fgetxattr,
+ fd, key, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xattr, int flags)
+ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
+ conf = this->private;
+
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for fd=%p", fd);
@@ -2041,23 +2393,16 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
local->call_cnt = 1;
STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
- fd, xattr, flags);
+ fd, xattr, flags, NULL);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -2065,16 +2410,58 @@ err:
static int
dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
+dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
+
+ if (!strcmp (value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev->this)
+ conf->decommissioned_bricks[i] = prev->this;
+ }
+ }
+
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ }
+ return 0;
+
+}
+
+int
dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags)
+ loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
@@ -2086,17 +2473,26 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
data_t *tmp = NULL;
uint32_t dir_spread = 0;
char value[4096] = {0,};
- int forced_rebalance = 0;
-
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
+ int call_cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
conf = this->private;
- subvol = dht_subvol_get_cached (this, loc->inode);
+
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", loc->path);
@@ -2104,13 +2500,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"no layout for path=%s", loc->path);
@@ -2118,15 +2508,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
+ local->call_cnt = call_cnt = layout->cnt;
tmp = dict_get (xattr, "distribute.migrate-data");
if (tmp) {
- if (!IA_ISREG (loc->inode->ia_type)) {
+ if (IA_ISDIR (loc->inode->ia_type)) {
op_errno = ENOTSUP;
goto err;
}
@@ -2135,15 +2521,27 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(ie, 'target' subvolume given there, etc) */
memcpy (value, tmp->data, tmp->len);
if (strcmp (value, "force") == 0)
- forced_rebalance = 1;
+ forced_rebalance =
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
- local->to_subvol = dht_subvol_get_hashed (this, loc);
- local->from_subvol = dht_subvol_get_cached (this, loc->inode);
- if (local->to_subvol == local->from_subvol) {
- op_errno = ENOTSUP;
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+
+ local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
+ if (!local->rebalance.target_node) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
goto err;
}
- if (local->to_subvol) {
+
+ local->rebalance.from_subvol = local->cached_subvol;
+
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
+ goto err;
+ }
+ if (local->rebalance.target_node) {
local->flags = forced_rebalance;
ret = dht_start_rebalance_task (this, frame);
@@ -2154,19 +2552,45 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
"%s: failed to create a new synctask",
loc->path);
}
- op_errno = ENOTSUP;
+ op_errno = EINVAL;
goto err;
}
+ tmp = dict_get (xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (!__is_root_gfid (loc->inode->gfid)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
+ local->key = gf_strdup (value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0 ; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND (frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr,
+ loc, GF_XATTR_PATHINFO_KEY, NULL);
+ }
+ return 0;
+ }
+
tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
if (tmp) {
gf_log (this->name, GF_LOG_INFO,
"fixing the layout of %s", loc->path);
- dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
tmp = dict_get (xattr, "distribute.directory-spread-count");
@@ -2178,10 +2602,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(dir_spread > 0))) {
layout->spread_cnt = dir_spread;
- dht_fix_directory_layout (frame,
- dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame,
+ dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
gf_log (this->name, GF_LOG_ERROR,
"wrong 'directory-spread-count' value (%s)", value);
@@ -2189,20 +2617,18 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
+ for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_err_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags);
+ loc, xattr, flags, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setxattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -2210,7 +2636,7 @@ err:
int
dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -2236,7 +2662,8 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (removexattr, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
+ local->op_errno, NULL);
}
return 0;
@@ -2245,22 +2672,35 @@ unlock:
int
dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+ loc_t *loc, const char *key, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", loc->path);
@@ -2268,13 +2708,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
+ layout = local->layout;
if (!local->layout) {
gf_log (this->name, GF_LOG_DEBUG,
"no layout for path=%s", loc->path);
@@ -2282,368 +2716,120 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local->call_cnt = layout->cnt;
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
- for (i = 0; i < layout->cnt; i++) {
+ for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_removexattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->removexattr,
- loc, key);
+ loc, key, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (removexattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
-
int
-dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
-
- return 0;
-}
-
-
-int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, int wbflags)
+dht_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
{
xlator_t *subvol = NULL;
- int ret = -1;
int op_errno = -1;
dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+ int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_fd_cbk,
- subvol, subvol->fops->open,
- loc, flags, fd, wbflags);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref)
-{
- dht_local_t *local = frame->local;
-
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
-out:
- DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
+ VALIDATE_OR_GOTO (this->private, err);
- return 0;
-}
+ conf = this->private;
-
-int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- dht_local_t *local = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
-out:
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
-
- return 0;
-}
-
-
-int
-dht_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int count, off_t off,
- struct iobref *iobref)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
+ "no cached subvolume for inode=%s",
+ uuid_utoa (fd->inode->gfid));
op_errno = EINVAL;
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
-
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, iobref);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
+ layout = local->layout;
+ if (!local->layout) {
gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
+ "no layout for inode=%s", uuid_utoa (fd->inode->gfid));
op_errno = EINVAL;
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
- goto err;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fremovexattr,
+ fd, key, NULL);
}
- local->fd = fd_ref (fd);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->flush, fd);
-
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync)
+dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_fsync_cbk,
- subvol, subvol->fops->fsync,
- fd, datasync);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct gf_flock *flock)
-{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock);
-
- return 0;
-}
-
-
-int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct gf_flock *flock)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ local->op_ret = 0;
}
+unlock:
+ UNLOCK (&frame->lock);
- STACK_WIND (frame, dht_lk_cbk,
- subvol, subvol->fops->lk,
- fd, cmd, flock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
return 0;
}
@@ -2673,7 +2859,7 @@ dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
int
dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -2719,14 +2905,14 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs);
+ &local->statvfs, xdata);
return 0;
}
int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
@@ -2738,12 +2924,11 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2755,7 +2940,8 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (frame, dht_statfs_cbk,
conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc);
+ conf->subvolumes[i]->fops->statfs, loc,
+ xdata);
}
return 0;
}
@@ -2771,28 +2957,27 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_cnt = 1;
STACK_WIND (frame, dht_statfs_cbk,
- subvol, subvol->fops->statfs, loc);
+ subvol, subvol->fops->statfs, loc, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
- int ret = -1;
int op_errno = -1;
int i = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -2800,35 +2985,27 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (frame, dht_fd_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2836,7 +3013,7 @@ err:
int
dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries)
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
dht_local_t *local = NULL;
gf_dirent_t entries;
@@ -2849,6 +3026,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dht_layout_t *layout = 0;
dht_conf_t *conf = NULL;
xlator_t *subvol = 0;
+ int ret = 0;
INIT_LIST_HEAD (&entries.list);
prev = cookie;
@@ -2865,10 +3043,13 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
-
- if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL)
- || (check_is_dir (NULL, (&orig_entry->d_stat), NULL)
- && (prev->this != dht_first_up_subvol (this)))) {
+ if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) &&
+ (prev->this != local->first_up_subvol)) {
+ continue;
+ }
+ if (check_is_linkfile (NULL, (&orig_entry->d_stat),
+ orig_entry->dict,
+ conf->link_xattr_name)) {
continue;
}
@@ -2884,7 +3065,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
orig_entry->d_name);
if (!subvol || (subvol != prev->this)) {
/* TODO: Count the number of entries which need
- linkfile to prove its existance in fs */
+ linkfile to prove its existence in fs */
layout->search_unhashed++;
}
}
@@ -2897,6 +3078,24 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
+ if (orig_entry->dict)
+ entry->dict = dict_ref (orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (!IA_ISDIR(entry->d_stat.ia_type)) {
+ ret = dht_layout_preset (this, prev->this,
+ orig_entry->inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to link the layout in inode");
+ entry->inode = inode_ref (orig_entry->inode);
+ } else if (orig_entry->inode) {
+ dht_inode_ctx_time_update (orig_entry->inode, this,
+ &entry->d_stat, 1);
+ }
+
list_add_tail (&entry->list, &entries.list);
count++;
}
@@ -2926,9 +3125,23 @@ done:
goto unwind;
}
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+
STACK_WIND (frame, dht_readdirp_cbk,
next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset);
+ local->fd, local->size, next_offset,
+ local->xattr);
return 0;
}
@@ -2936,7 +3149,7 @@ unwind:
if (op_ret < 0)
op_ret = 0;
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free (&entries);
@@ -2947,7 +3160,8 @@ unwind:
int
dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
gf_dirent_t entries;
@@ -2958,13 +3172,11 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
off_t next_offset = 0;
int count = 0;
dht_layout_t *layout = 0;
- dht_conf_t *conf = NULL;
xlator_t *subvol = 0;
INIT_LIST_HEAD (&entries.list);
prev = cookie;
local = frame->local;
- conf = this->private;
if (op_ret < 0)
goto done;
@@ -3026,7 +3238,7 @@ done:
STACK_WIND (frame, dht_readdir_cbk,
next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset);
+ local->fd, local->size, next_offset, NULL);
return 0;
}
@@ -3034,7 +3246,7 @@ unwind:
if (op_ret < 0)
op_ret = 0;
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free (&entries);
@@ -3044,46 +3256,76 @@ unwind:
int
dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop)
+ off_t yoff, int whichop, dict_t *dict)
{
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
int op_errno = -1;
xlator_t *xvol = NULL;
off_t xoff = 0;
-
+ int ret = 0;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, NULL, NULL, whichop);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
local->fd = fd_ref (fd);
local->size = size;
+ local->xattr_req = (dict)? dict_ref (dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol (this);
dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
/* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIR)
- STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff);
- else
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref (dict);
+ else
+ local->xattr = dict_new ();
+
+ if (local->xattr) {
+ ret = dict_set_uint32 (local->xattr,
+ conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set '%s' key",
+ conf->link_xattr_name);
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "Dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+ }
+
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff);
+ fd, size, xoff, local->xattr);
+ } else {
+ STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, xoff, local->xattr);
+ }
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -3091,7 +3333,7 @@ err:
int
dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *xdata)
{
int op = GF_FOP_READDIR;
dht_conf_t *conf = NULL;
@@ -3112,15 +3354,15 @@ dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
op = GF_FOP_READDIRP;
out:
- dht_do_readdir (frame, this, fd, size, yoff, op);
+ dht_do_readdir (frame, this, fd, size, yoff, op, 0);
return 0;
}
int
dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *dict)
{
- dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP);
+ dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
return 0;
}
@@ -3128,7 +3370,7 @@ dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int
dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -3148,21 +3390,22 @@ dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
return 0;
}
int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int datasync, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int op_errno = -1;
int i = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -3170,10 +3413,9 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
if (!local) {
op_errno = ENOMEM;
-
goto err;
}
@@ -3184,14 +3426,14 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
STACK_WIND (frame, dht_fsyncdir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
@@ -3201,9 +3443,9 @@ int
dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
+ xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
@@ -3221,19 +3463,24 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
- ret = dht_layout_preset (this, prev->this, inode);
+ ret = dht_layout_preset (this, prev, inode);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"could not set pre-set layout for subvolume %s",
- prev->this->name);
+ prev? prev->name: NULL);
op_ret = -1;
op_errno = EINVAL;
goto out;
}
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
out:
/*
* FIXME: ia_size and st_blocks of preparent and postparent do not have
@@ -3242,9 +3489,9 @@ out:
* corresponding values from each of the subvolume.
* See dht_iatt_merge for reference.
*/
-
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
return 0;
}
@@ -3253,7 +3500,8 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -3262,42 +3510,43 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
goto err;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
cached_subvol = local->cached_subvol;
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask,
+ local->params);
return 0;
err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
int
dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
xlator_t *subvol = NULL;
int op_errno = -1;
- int ret = -1;
xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
-
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
if (!local) {
op_errno = ENOMEM;
-
goto err;
}
@@ -3310,22 +3559,17 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
if (!dht_is_subvol_filled (this, subvol)) {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
+ subvol, subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
} else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+
+ avail_subvol = dht_free_disk_available_subvol (this, subvol,
+ local);
if (avail_subvol != subvol) {
/* Choose the minimum filled volume, and create the
files there */
@@ -3334,17 +3578,18 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
local->cached_subvol = avail_subvol;
local->mode = mode;
local->rdev = rdev;
-
+ local->umask = umask;
dht_linkfile_create (frame,
dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
} else {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
}
}
@@ -3353,7 +3598,7 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -3361,22 +3606,19 @@ err:
int
dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, dict_t *params)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
if (!local) {
op_errno = ENOMEM;
-
goto err;
}
@@ -3389,41 +3631,33 @@ dht_symlink (call_frame_t *frame, xlator_t *this,
goto err;
}
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "Failed to copy loc");
- op_errno = ENOMEM;
- goto err;
- }
-
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
xlator_t *cached_subvol = NULL;
xlator_t *hashed_subvol = NULL;
- int ret = -1;
int op_errno = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
@@ -3435,15 +3669,14 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->loc.path, cached_subvol->name, loc->path);
STACK_WIND (frame, dht_unlink_cbk,
cached_subvol, cached_subvol->fops->unlink,
- &local->loc);
+ &local->loc, xflag, xdata);
goto done;
}
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
goto err;
}
@@ -3456,32 +3689,29 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
goto err;
}
+ local->flags = xflag;
if (hashed_subvol != cached_subvol) {
STACK_WIND (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc);
+ hashed_subvol, hashed_subvol->fops->unlink, loc,
+ xflag, xdata);
} else {
STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc);
+ cached_subvol, cached_subvol->fops->unlink, loc,
+ xflag, xdata);
}
done:
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -3491,13 +3721,14 @@ int
dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
dht_layout_t *layout = NULL;
dht_local_t *local = NULL;
prev = cookie;
+
local = frame->local;
if (op_ret == -1)
@@ -3513,12 +3744,20 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- WIPE (preparent);
- WIPE (postparent);
-
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -3528,27 +3767,27 @@ int
dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *srcvol = NULL;
-
if (op_ret == -1)
goto err;
local = frame->local;
srcvol = local->linkfile.srcvol;
- STACK_WIND (frame, dht_link_cbk,
- srcvol, srcvol->fops->link,
- &local->loc, &local->loc2);
+ STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
+ &local->loc, &local->loc2, xdata);
return 0;
err:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -3556,7 +3795,7 @@ err:
int
dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_t *cached_subvol = NULL;
xlator_t *hashed_subvol = NULL;
@@ -3564,13 +3803,19 @@ dht_link (call_frame_t *frame, xlator_t *this,
int ret = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (oldloc, err);
VALIDATE_OR_GOTO (newloc, err);
- cached_subvol = dht_subvol_get_cached (this, oldloc->inode);
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
if (!cached_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", oldloc->path);
@@ -3587,42 +3832,27 @@ dht_link (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
ret = loc_copy (&local->loc2, newloc);
if (ret == -1) {
op_errno = ENOMEM;
-
goto err;
}
if (hashed_subvol != cached_subvol) {
- memcpy (local->gfid, oldloc->inode->gfid, 16);
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
+ uuid_copy (local->gfid, oldloc->inode->gfid);
+ dht_linkfile_create (frame, dht_link_linkfile_cbk, this,
cached_subvol, hashed_subvol, newloc);
} else {
STACK_WIND (frame, dht_link_cbk,
cached_subvol, cached_subvol->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -3632,7 +3862,7 @@ int
dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
int ret = -1;
@@ -3651,8 +3881,11 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
ret = dht_layout_preset (this, prev->this, inode);
@@ -3664,10 +3897,14 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
goto out;
}
-
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -3677,7 +3914,8 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -3691,23 +3929,22 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
cached_subvol, cached_subvol->fops->create,
&local->loc, local->flags, local->mode,
- local->fd, local->params);
+ local->umask, local->fd, local->params);
return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
int
dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
int op_errno = -1;
- int ret = -1;
xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
@@ -3715,13 +3952,10 @@ dht_create (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
-
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
@@ -3733,16 +3967,10 @@ dht_create (call_frame_t *frame, xlator_t *this,
local->loc.path, subvol->name, loc->path);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- &local->loc, flags, mode, fd, params);
+ &local->loc, flags, mode, umask, fd, params);
goto done;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
subvol = dht_subvol_get_hashed (this, loc);
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -3757,40 +3985,38 @@ dht_create (call_frame_t *frame, xlator_t *this,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
goto done;
}
/* Choose the minimum filled volume, and create the
files there */
- /* TODO */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
if (avail_subvol != subvol) {
- local->fd = fd_ref (fd);
local->params = dict_ref (params);
local->flags = flags;
local->mode = mode;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s (link at %s)", loc->path,
avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
goto done;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
done:
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
@@ -3799,26 +4025,28 @@ err:
int
dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
-
local = frame->local;
layout = local->selfheal.layout;
if (op_ret == 0) {
dht_layout_set (this, local->inode, layout);
if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
}
}
DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
local->inode, &local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
return 0;
}
@@ -3826,17 +4054,15 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
int
dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
int ret = -1;
- int subvol_filled = 0;
+ gf_boolean_t subvol_filled = _gf_false;
call_frame_t *prev = NULL;
dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- conf = this->private;
local = frame->local;
prev = cookie;
layout = local->layout;
@@ -3849,9 +4075,21 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ret = dht_layout_merge (this, layout, prev->this,
-1, ENOSPC, NULL);
} else {
+ if (op_ret == -1 && op_errno == EEXIST)
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
}
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
if (op_ret == -1) {
local->op_errno = op_errno;
@@ -3878,7 +4116,8 @@ int
dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int ret = -1;
@@ -3906,6 +4145,12 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
+ /* TODO: we may have to return from the function
+ if layout merge fails. For now, lets just log an error */
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
+
if (op_ret == -1) {
local->op_errno = op_errno;
goto err;
@@ -3918,6 +4163,8 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
local->call_cnt = conf->subvolume_cnt - 1;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, stbuf->ia_gfid);
if (local->call_cnt == 0) {
dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
&local->loc, layout);
@@ -3927,24 +4174,24 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
continue;
STACK_WIND (frame, dht_mkdir_cbk,
conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir,
- &local->loc, local->mode, local->params);
+ conf->subvolumes[i]->fops->mkdir, &local->loc,
+ local->mode, local->umask, local->params);
}
return 0;
err:
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
-int
+ int
dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int op_errno = -1;
- int ret = -1;
xlator_t *hashed_subvol = NULL;
@@ -3959,15 +4206,13 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
hashed_subvol = dht_subvol_get_hashed (this, loc);
-
if (hashed_subvol == NULL) {
gf_log (this->name, GF_LOG_DEBUG,
"hashed subvol not found for %s",
@@ -3977,21 +4222,13 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
}
local->hashed_subvol = hashed_subvol;
- local->inode = inode_ref (loc->inode);
- ret = loc_copy (&local->loc, loc);
local->mode = mode;
-
- if (ret == -1) {
-
- op_errno = ENOMEM;
- goto err;
- }
-
+ local->umask = umask;
local->params = dict_ref (params);
+ local->inode = inode_ref (loc->inode);
local->layout = dht_layout_new (this, conf->subvolume_cnt);
if (!local->layout) {
-
op_errno = ENOMEM;
goto err;
}
@@ -3999,13 +4236,14 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_mkdir_hashed_cbk,
hashed_subvol,
hashed_subvol->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -4013,14 +4251,87 @@ err:
int
dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (op_errno != ENOENT && op_errno != EACCES) {
+ local->need_selfheal = 1;
+ }
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ uuid_copy (local->gfid, local->loc.inode->gfid);
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, local->layout);
+ } else {
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+ }
+
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
return 0;
}
@@ -4029,11 +4340,12 @@ dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
+ int done = 0;
local = frame->local;
prev = cookie;
@@ -4044,8 +4356,9 @@ dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
local->op_ret = -1;
- if (op_errno != ENOENT)
+ if (op_errno != ENOENT && op_errno != EACCES) {
local->need_selfheal = 1;
+ }
gf_log (this->name, GF_LOG_DEBUG,
"rmdir on %s for %s failed (%s)",
@@ -4054,6 +4367,8 @@ dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unlock;
}
+ /* Track if rmdir succeeded on atleast one subvol*/
+ local->fop_succeeded = 1;
dht_iatt_merge (this, &local->preparent, preparent, prev->this);
dht_iatt_merge (this, &local->postparent, postparent,
prev->this);
@@ -4063,8 +4378,17 @@ unlock:
this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
+
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
+
+
+ if (done) {
+ if (local->need_selfheal && local->fop_succeeded) {
local->layout =
dht_layout_get (this, local->loc.inode);
@@ -4074,15 +4398,34 @@ unlock:
uuid_copy (local->gfid, local->loc.inode->gfid);
dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
&local->loc, local->layout);
- } else {
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+
+ local->need_selfheal = 0;
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
+
if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+
}
DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
local->op_errno, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
}
@@ -4096,6 +4439,7 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int i = 0;
+ xlator_t *hashed_subvol = NULL;
VALIDATE_OR_GOTO (this->private, err);
@@ -4107,18 +4451,41 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
local->call_cnt = conf->subvolume_cnt;
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to get hashed "
+ "subvol for %s",local->loc.path);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ return 0;
+ }
+
for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol &&
+ (hashed_subvol == conf->subvolumes[i]))
+ continue;
+
STACK_WIND (frame, dht_rmdir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->rmdir,
- &local->loc, local->flags);
+ &local->loc, local->flags, NULL);
}
return 0;
err:
DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -4126,7 +4493,7 @@ err:
int
dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -4174,6 +4541,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_frame_t *main_frame = NULL;
dht_local_t *main_local = NULL;
int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
prev = cookie;
@@ -4185,7 +4553,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != 0)
goto err;
- if (check_is_linkfile (inode, stbuf, xattr) == 0) {
+ if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
main_local->op_ret = -1;
main_local->op_errno = ENOTEMPTY;
@@ -4196,7 +4564,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
- src, src->fops->unlink, &local->loc);
+ src, src->fops->unlink, &local->loc, 0, NULL);
return 0;
err:
@@ -4219,6 +4587,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
call_frame_t *lookup_frame = NULL;
dht_local_t *lookup_local = NULL;
dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
local = frame->local;
@@ -4227,7 +4597,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
continue;
if (strcmp (trav->d_name, "..") == 0)
continue;
- if (check_is_linkfile (NULL, (&trav->d_stat), NULL) == 1) {
+ if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
ret++;
continue;
}
@@ -4239,6 +4610,21 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
return 0;
}
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ return -1;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
+ " in dict");
+ if (xattrs)
+ dict_unref (xattrs);
+ return -1;
+ }
+
list_for_each_entry (trav, &entries->list, list) {
if (strcmp (trav->d_name, ".") == 0)
continue;
@@ -4255,8 +4641,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
goto err;
}
- lookup_local = GF_CALLOC (sizeof (*local), 1,
- gf_dht_mt_dht_local_t);
+ lookup_local = mem_get0 (this->local_pool);
if (!lookup_local) {
goto err;
}
@@ -4269,6 +4654,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
if (build_ret != 0)
goto err;
+ uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid);
+
gf_log (this->name, GF_LOG_TRACE,
"looking up %s on %s",
lookup_local->loc.path, src->name);
@@ -4281,12 +4668,18 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
src, src->fops->lookup,
- &lookup_local->loc, NULL);
+ &lookup_local->loc, xattrs);
ret++;
}
+ if (xattrs)
+ dict_unref (xattrs);
+
return ret;
err:
+ if (xattrs)
+ dict_unref (xattrs);
+
DHT_STACK_DESTROY (lookup_frame);
return 0;
}
@@ -4294,7 +4687,8 @@ err:
int
dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -4338,12 +4732,14 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
call_frame_t *prev = NULL;
-
+ dict_t *dict = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
prev = cookie;
@@ -4353,14 +4749,32 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"opendir on %s for %s failed (%s)",
prev->this->name, local->loc.path,
strerror (op_errno));
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
local->op_ret = -1;
- local->op_errno = op_errno;
+ local->op_errno = ENOMEM;
goto err;
}
+ ret = dict_set_uint32 (dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ local->loc.path, conf->link_xattr_name);
+
STACK_WIND (frame, dht_rmdir_readdirp_cbk,
prev->this, prev->this->fops->readdirp,
- local->fd, 4096, 0);
+ local->fd, 4096, 0, dict);
+
+ if (dict)
+ dict_unref (dict);
return 0;
@@ -4376,14 +4790,13 @@ err:
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int op_errno = -1;
int i = -1;
- int ret = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -4394,22 +4807,15 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
local->call_cnt = conf->subvolume_cnt;
local->op_ret = 0;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
-
- op_errno = ENOMEM;
- goto err;
- }
+ local->fop_succeeded = 0;
local->flags = flags;
@@ -4424,7 +4830,7 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
STACK_WIND (frame, dht_rmdir_opendir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- loc, local->fd);
+ loc, local->fd, NULL);
}
return 0;
@@ -4432,240 +4838,42 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
-
int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict);
- return 0;
-}
-
+dht_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
-int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL);
-
+ DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
+dht_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
if (!local) {
op_errno = ENOMEM;
-
goto err;
}
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno);
-
- return 0;
-}
-
-
-int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
-
- STACK_WIND (frame,
- dht_finodelk_cbk,
- subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno);
-
- return 0;
-}
-
-
-int
-dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", loc->path);
@@ -4673,25 +4881,17 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
local->call_cnt = 1;
STACK_WIND (frame, dht_entrylk_cbk,
subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (entrylk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -4699,10 +4899,10 @@ err:
int
dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -4710,7 +4910,7 @@ dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
int
dht_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -4729,168 +4929,13 @@ dht_fentrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_fentrylk_cbk,
subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno);
-
- return 0;
-}
-
-
-int
-dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
- dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
-
- return 0;
-}
-
-
-int
-dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
- }
+ volume, fd, basename, cmd, type, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid)
-{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -4899,64 +4944,22 @@ err:
int
dht_forget (xlator_t *this, inode_t *inode)
{
- uint64_t tmp_layout = 0;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
dht_layout_t *layout = NULL;
- inode_ctx_del (inode, this, &tmp_layout);
+ inode_ctx_del (inode, this, &ctx_int);
- if (!tmp_layout)
+ if (!ctx_int)
return 0;
- layout = (dht_layout_t *)(long)tmp_layout;
- dht_layout_unref (this, layout);
-
- return 0;
-}
-
-
-
-int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
-{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
-
- if (!conf)
- return -1;
-
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
-
- conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->subvolumes) {
-
- return -1;
- }
- conf->subvolume_cnt = cnt;
-
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
-
- conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
- gf_dht_mt_char);
- if (!conf->subvolume_status) {
-
- return -1;
- }
+ ctx = (dht_inode_ctx_t *) (long) ctx_int;
- conf->last_event = GF_CALLOC (cnt, sizeof (int),
- gf_dht_mt_char);
- if (!conf->last_event) {
+ layout = ctx->layout;
+ ctx->layout = NULL;
+ dht_layout_unref (this, layout);
+ GF_FREE (ctx);
- return -1;
- }
- conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
- gf_dht_mt_subvol_time);
- if (!conf->subvol_up_time) {
- return -1;
- }
return 0;
}
@@ -4964,16 +4967,22 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
int
dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
conf = this->private;
if (!conf)
@@ -5035,7 +5044,11 @@ dht_notify (xlator_t *this, int event, void *data, ...)
if (conf->assert_no_child_down) {
gf_log (this->name, GF_LOG_WARNING,
"Received CHILD_DOWN. Exiting");
- exit(0);
+ if (conf->defrag) {
+ gf_defrag_stop (conf->defrag, NULL);
+ } else {
+ kill (getpid(), SIGTERM);
+ }
}
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -5086,6 +5099,36 @@ dht_notify (xlator_t *this, int event, void *data, ...)
UNLOCK (&conf->subvolume_lock);
break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if (cmd == GF_DEFRAG_CMD_STATUS)
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ gf_defrag_stop (defrag, output);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return 0;
+ break;
+ }
+
default:
propagate = 1;
break;
@@ -5101,9 +5144,12 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* if all subvols have reported status, no need to hide anything
or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
+ if (have_heard_from_all) {
propagate = 1;
+ }
+
+
if (!had_heard_from_all && have_heard_from_all) {
/* This is the first event which completes aggregation
of events from all subvolumes. If at least one subvol
@@ -5122,6 +5168,19 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* continue to check other events for CHILD_UP */
}
}
+
+ /* rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ */
+ if (conf->defrag) {
+ ret = gf_thread_create (&conf->defrag->th, NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ conf->defrag = NULL;
+ GF_FREE (conf->defrag);
+ kill (getpid(), SIGTERM);
+ }
+ }
}
ret = 0;
@@ -5130,3 +5189,24 @@ dht_notify (xlator_t *this, int event, void *data, ...)
return ret;
}
+
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 95d1b1d6a..5ccd66799 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -22,6 +13,8 @@
#include "config.h"
#endif
+#include <regex.h>
+
#include "dht-mem-types.h"
#include "libxlator.h"
#include "syncop.h"
@@ -29,45 +22,81 @@
#ifndef _DHT_H
#define _DHT_H
-#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout"
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
#define GF_DHT_LOOKUP_UNHASHED_ON 1
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
-#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
#include <fnmatch.h>
typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
+ int ret);
struct dht_layout {
- int spread_cnt; /* layout spread count per directory,
- is controlled by 'setxattr()' with
- special key */
- int cnt;
- int preset;
- int gen;
- int type;
- int ref; /* use with dht_conf_t->layout_lock */
- int search_unhashed;
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ int gen;
+ int type;
+ int ref; /* use with dht_conf_t->layout_lock */
+ int search_unhashed;
struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
- } list[0];
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ xlator_t *xlator;
+ } list[];
+};
+typedef struct dht_layout dht_layout_t;
+
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
};
-typedef struct dht_layout dht_layout_t;
+
+typedef struct dht_inode_ctx dht_inode_ctx_t;
typedef enum {
DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
} dht_hashfn_type_t;
+/* rebalance related */
+struct dht_rebalance_ {
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
+};
struct dht_local {
int call_cnt;
@@ -101,6 +130,7 @@ struct dht_local {
int file_count;
int dir_count;
call_frame_t *main_frame;
+ int fop_succeeded;
struct {
fop_mknod_cbk_t linkfile_cbk;
struct iatt stbuf;
@@ -112,7 +142,6 @@ struct dht_local {
struct {
uint32_t hole_cnt;
uint32_t overlaps_cnt;
- uint32_t missing;
uint32_t down;
uint32_t misc;
dht_selfheal_dir_cbk_t dir_cbk;
@@ -125,11 +154,16 @@ struct dht_local {
int32_t flags;
mode_t mode;
dev_t rdev;
+ mode_t umask;
/* need for file-info */
- char *pathinfo;
+ char *xattr_val;
char *key;
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
char *newpath;
/* gfid related */
@@ -140,22 +174,82 @@ struct dht_local {
/* flag used to make sure we need to return estale in
{lookup,revalidate}_cbk */
- char return_estale;
+ char return_estale;
+ char need_lookup_everywhere;
+
+ glusterfs_fop_t fop;
+
+ gf_boolean_t linked;
+ xlator_t *link_subvol;
+
+ struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
- /* rebalance related */
-#define to_subvol hashed_subvol
-#define from_subvol cached_subvol
};
typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
double avail_percent;
+ double avail_inodes;
uint64_t avail_space;
uint32_t log;
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
+
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ struct timeval start_time;
+ gf_boolean_t stats;
+ gf_defrag_pattern_list_t *defrag_pattern;
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
struct dht_conf {
gf_lock_t subvolume_lock;
int subvolume_cnt;
@@ -167,7 +261,8 @@ struct dht_conf {
gf_boolean_t search_unhashed;
int gen;
dht_du_t *du_stats;
- uint64_t min_free_disk;
+ double min_free_disk;
+ double min_free_inodes;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
@@ -184,7 +279,28 @@ struct dht_conf {
/* Will be a global flag to control the layout spread count */
uint32_t dir_spread_cnt;
- struct syncenv *env; /* The env pointer to the rebalance synctask */
+ /* to keep track of nodes which are decomissioned */
+ xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Request to filter directory entries in readdir request */
+
+ gf_boolean_t readdir_optimize;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ gf_boolean_t rsync_regex_valid;
+ regex_t extra_regex;
+ gf_boolean_t extra_regex_valid;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *link_xattr_name;
+ char *wild_xattr_name;
};
typedef struct dht_conf dht_conf_t;
@@ -199,21 +315,45 @@ struct dht_disk_layout {
};
typedef struct dht_disk_layout dht_disk_layout_t;
-#define WIPE(statp) do { typeof(*statp) z = {0,}; if (statp) *statp = z; } while (0)
+typedef enum {
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+} gf_dht_migrate_data_type_t;
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
-
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
-#define DHT_LINKFILE_MODE (S_ISVTX)
-#define check_is_linkfile(i,s,x) ( \
- ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE) && \
- (s->ia_size == 0))
+#define DHT_MIGRATION_IN_PROGRESS 1
+#define DHT_MIGRATION_COMPLETED 2
+
+#define DHT_LINKFILE_MODE (S_ISVTX)
+
+#define check_is_linkfile(i,s,x,n) ( \
+ ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
+ == DHT_LINKFILE_MODE) && \
+ dict_get (x, n))
+
+#define IS_DHT_MIGRATION_PHASE2(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \
+ ~S_IFMT) == DHT_LINKFILE_MODE))
+
+#define IS_DHT_MIGRATION_PHASE1(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((buf)->ia_prot.sticky == 1) && \
+ ((buf)->ia_prot.sgid == 1))
+
+#define DHT_STRIP_PHASE1_FLAGS(buf) do { \
+ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
+ (buf)->ia_prot.sticky = 0; \
+ (buf)->ia_prot.sgid = 0; \
+ } \
+ } while (0)
#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
@@ -221,118 +361,427 @@ typedef struct dht_disk_layout dht_disk_layout_t;
#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
+ xlator_t *__xl = NULL; \
if (frame) { \
- __xl = frame->this; \
- __local = frame->local; \
+ __xl = frame->this; \
+ __local = frame->local; \
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
dht_local_wipe (__xl, __local); \
} while (0)
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__xl, __local); \
+#define DHT_STACK_DESTROY(frame) do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ dht_local_wipe (__xl, __local); \
+ } while (0)
+
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\
+ int32_t sec = 0; \
+ sec = new_sec; \
+ LOCK (&inode->lock); \
+ { \
+ new_sec = max(new_sec, ctx_sec); \
+ if (sec < new_sec) \
+ new_nsec = ctx_nsec; \
+ if (sec == new_sec) \
+ new_nsec = max (new_nsec, ctx_nsec); \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } \
+ UNLOCK (&inode->lock); \
} while (0)
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
+#define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
+dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
+dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
+dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
const char *name);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
+int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
+int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p,
+ uint32_t *misc_p, uint32_t *no_space_p);
+int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, loc_t *loc, dict_t *xattr);
xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
struct iatt *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
+int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc);
int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
+ int op_ret, int op_errno, dict_t *xattr);
int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw);
+ int pos, int32_t **disk_layout_p);
+int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, void *disk_layout_raw, int disk_layout_len);
int dht_frame_return (call_frame_t *frame);
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
+int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
uint64_t *x);
void dht_local_wipe (xlator_t *this, dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame);
-int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
- xlator_t *subvol);
+dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
+ glusterfs_fop_t fop);
+int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
+ xlator_t *subvol);
xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev);
+int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol,
+ xlator_t *fromvol, loc_t *loc);
+int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
dht_layout_t *layout);
int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_layout_sort_volname (dht_layout_t *layout);
-int dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
-
int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
+int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);
-void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
+int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
+void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
-xlator_t *dht_first_up_subvol (xlator_t *this);
-xlator_t *dht_last_up_subvol (xlator_t *this);
+xlator_t *dht_first_up_subvol (xlator_t *this);
+xlator_t *dht_last_up_subvol (xlator_t *this);
int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
xlator_t **subvol);
-int dht_rename_cleanup (call_frame_t *frame);
+int dht_rename_cleanup (call_frame_t *frame);
int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent);
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
int dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout);
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
+int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+
+/* migration/rebalance */
int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
-#endif /* _DHT_H */
+
+int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
+int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+
+
+/* FOPS */
+int32_t dht_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req);
+
+int32_t dht_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int32_t dht_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
+
+int32_t dht_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
+
+int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata);
+
+int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+int32_t dht_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params);
+
+int32_t dht_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+int32_t dht_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t dht_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t dht_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t dht_fsetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_fgetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t dht_fremovexattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t dht_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *dict);
+
+int32_t dht_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_forget (xlator_t *this, inode_t *inode);
+int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+int32_t dht_init (xlator_t *this);
+void dht_fini (xlator_t *this);
+int dht_reconfigure (xlator_t *this, dict_t *options);
+int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+
+/* definitions for nufa/switch */
+int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+
+void*
+gf_defrag_start (void *this);
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf);
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag);
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t* layout_int);
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
+
+int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal (void *data);
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
+int
+dht_dir_has_layout (dict_t *xattr, char *name);
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+xlator_t *
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix);
+int32_t
+dht_priv_dump (xlator_t *this);
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol);
+
+#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 9e2327bff..fe3955ecb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -35,224 +26,389 @@
int
dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs,
+ dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
- int i = 0;
- double percent = 0;
- uint64_t bytes = 0;
-
- conf = this->private;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get disk info from %s", prev->this->name);
- goto out;
- }
-
- if (statvfs && statvfs->f_blocks) {
- percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
- bytes = (statvfs->f_bavail * statvfs->f_frsize);
- }
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (prev->this == conf->subvolumes[i]) {
- conf->du_stats[i].avail_percent = percent;
- conf->du_stats[i].avail_space = bytes;
- gf_log (this->name, GF_LOG_DEBUG,
- "on subvolume '%s': avail_percent is: "
- "%.2f and avail_space is: %"PRIu64"",
- prev->this->name,
- conf->du_stats[i].avail_percent,
- conf->du_stats[i].avail_space);
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ int i = 0;
+ double percent = 0;
+ double percent_inodes = 0;
+ uint64_t bytes = 0;
+
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get disk info from %s", prev->this->name);
+ goto out;
+ }
+
+ if (statvfs && statvfs->f_blocks) {
+ percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
+ bytes = (statvfs->f_bavail * statvfs->f_frsize);
+ }
+
+ if (statvfs && statvfs->f_files) {
+ percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
+ } else {
+ /* set percent inodes to 100 for dynamically allocated inode filesystems
+ this logic holds good so that, distribute has nothing to worry about
+ total inodes rather let the 'create()' to be scheduled on the hashed
+ subvol regardless of the total inodes. since we have no awareness on
+ loosing inodes this logic fits well
+ */
+ percent_inodes = 100;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev->this == conf->subvolumes[i]) {
+ conf->du_stats[i].avail_percent = percent;
+ conf->du_stats[i].avail_space = bytes;
+ conf->du_stats[i].avail_inodes = percent_inodes;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "on subvolume '%s': avail_percent is: "
+ "%.2f and avail_space is: %"PRIu64" "
+ "and avail_inodes is: %.2f",
+ prev->this->name,
+ conf->du_stats[i].avail_percent,
+ conf->du_stats[i].avail_space,
+ conf->du_stats[i].avail_inodes);
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_DESTROY (frame);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
{
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- call_pool_t *pool = NULL;
-
- conf = this->private;
- pool = this->ctx->pool;
-
- statfs_frame = create_frame (this, pool);
- if (!statfs_frame) {
- goto err;
- }
-
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- goto err;
- }
-
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = 1;
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[subvol_idx],
- conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc);
-
- return 0;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ call_pool_t *pool = NULL;
+ loc_t tmp_loc = {0,};
+
+ conf = this->private;
+ pool = this->ctx->pool;
+
+ statfs_frame = create_frame (this, pool);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init (statfs_frame, NULL, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
+
+ statfs_local->call_cnt = 1;
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx]->fops->statfs,
+ &tmp_loc, NULL);
+
+ return 0;
err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ if (statfs_frame)
+ DHT_STACK_DESTROY (statfs_frame);
- return -1;
+ return -1;
}
int
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ struct timeval tv = {0,};
+ loc_t tmp_loc = {0,};
+
+ conf = this->private;
+
+ gettimeofday (&tv, NULL);
+
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
+
+ if (tv.tv_sec > (conf->refresh_interval
+ + conf->last_stat_fetch.tv_sec)) {
+
+ statfs_frame = copy_frame (frame);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init (statfs_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ statfs_local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs,
+ &tmp_loc, NULL);
+ }
+
+ conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ }
+ return 0;
+err:
+ if (statfs_frame)
+ DHT_STACK_DESTROY (statfs_frame);
- conf = this->private;
+ return -1;
+}
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
- statfs_frame = copy_frame (frame);
- if (!statfs_frame) {
- goto err;
- }
+gf_boolean_t
+dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ gf_boolean_t subvol_filled_inodes = _gf_false;
+ gf_boolean_t subvol_filled_space = _gf_false;
+ gf_boolean_t is_subvol_filled = _gf_false;
+
+ conf = this->private;
+
+ /* Check for values above specified percent or free disk */
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ if (conf->disk_unit == 'p') {
+ if (conf->du_stats[i].avail_percent <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+
+ } else {
+ if (conf->du_stats[i].avail_space <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+ }
+ if (conf->du_stats[i].avail_inodes <
+ conf->min_free_inodes) {
+ subvol_filled_inodes = _gf_true;
+ break;
+ }
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ if (subvol_filled_space && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "disk space on subvolume '%s' is getting "
+ "full (%.2f %%), consider adding more nodes",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_percent));
+ }
+ }
+
+ if (subvol_filled_inodes && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "inodes on subvolume '%s' are at "
+ "(%.2f %%), consider adding more nodes",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_inodes));
+ }
+ }
+
+ is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
+
+ return is_subvol_filled;
+}
+
+
+/*Get the best subvolume to create the file in*/
+xlator_t *
+dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
+{
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- goto err;
+ conf = this->private;
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
+ goto out;
}
+ } else {
+ layout = dht_layout_ref (this, local->layout);
+ }
- loc_copy (&statfs_local->loc, loc);
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs,
- &tmp_loc);
+ LOCK (&conf->subvolume_lock);
+ {
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ layout);
+ if(!avail_subvol)
+ {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
+ subvol,
+ layout);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
- return 0;
-err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ }
+ UNLOCK (&conf->subvolume_lock);
+out:
+ if (!avail_subvol) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "no subvolume has enough free space and/or inodes\
+ to create");
+ avail_subvol = subvol;
+ }
- return -1;
+ if (layout)
+ dht_layout_unref (this, layout);
+ return avail_subvol;
}
+static inline
+int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
-int
-dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
+ if (!this || !layout)
+ goto out;
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
- int i = 0;
- int subvol_filled = 0;
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
- /* Check for values above specified percent or free disk */
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- } else {
- if (conf->du_stats[i].avail_space <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- }
+ for(i=0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
}
}
- }
- UNLOCK (&conf->subvolume_lock);
-
- if (subvol_filled && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_log (this->name, GF_LOG_WARNING,
- "disk space on subvolume '%s' is getting "
- "full (%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_percent));
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
}
}
- return subvol_filled;
+ return avail_subvol;
}
+
+/* Get subvol which has atleast one inode and maximum space */
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
int i = 0;
- double max= 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent > max) {
- max = conf->du_stats[i].avail_percent;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if (conf->du_stats[i].avail_space > max) {
- max = conf->du_stats[i].avail_space;
- avail_subvol = conf->subvolumes[i];
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max)
+ && (conf->du_stats[i].avail_inodes > 0 )) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
}
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- if (!avail_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space to create");
+ } else {
+ if ((conf->du_stats[i].avail_space > max)
+ && (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
}
- if (max < conf->min_free_disk)
- avail_subvol = subvol;
-
- if (!avail_subvol)
- avail_subvol = subvol;
-
return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index c8ae74172..656cf23a0 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -37,6 +28,7 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
switch (type) {
case DHT_HASH_TYPE_DM:
+ case DHT_HASH_TYPE_DM_USER:
hash = gf_dm_hashfn (name, strlen (name));
break;
default:
@@ -52,30 +44,68 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
}
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
+static inline
+gf_boolean_t
+dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2];
+ size_t new_len;
+
+ if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy (modified,original+matches[1].rm_so,
+ new_len);
+ modified[new_len] = '\0';
+ return _gf_true;
+ }
+ }
+ }
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified,original);
+ return _gf_false;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = this->private;
+ size_t len = 0;
+ gf_boolean_t munged = _gf_false;
+
+ /*
+ * It wouldn't be safe to use alloca in an inline function that doesn't
+ * actually get inlined, and it wouldn't be efficient to do a real
+ * allocation, so we use alloca here (if needed) and pass that to the
+ * inline.
+ */
+
+ if (priv->extra_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ gf_log (this->name, GF_LOG_TRACE, "trying regex for %s", name);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ if (munged) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "munged down to %s", rsync_friendly_name);
+ }
+ }
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+ if (!munged) {
+ rsync_friendly_name = (char *)name;
+ }
return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 44ed9c682..311a48112 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,6 +18,28 @@
#include "xlator.h"
#include "dht-common.h"
+static inline int
+dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol)
+{
+ uint64_t tmp_subvol = 0;
+
+ tmp_subvol = (long)subvol;
+ return inode_ctx_set1 (inode, this, &tmp_subvol);
+}
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol)
+{
+ int ret = -1;
+ uint64_t tmp_subvol = 0;
+
+ ret = inode_ctx_get1 (inode, this, &tmp_subvol);
+ if (tmp_subvol && subvol)
+ *subvol = (xlator_t *)tmp_subvol;
+
+ return ret;
+}
+
int
dht_frame_return (call_frame_t *frame)
@@ -49,6 +62,43 @@ dht_frame_return (call_frame_t *frame)
}
+static uint64_t
+dht_bits_for (uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
+ }
+
+ return bits;
+}
+
+/*
+ * A slightly "updated" version of the algorithm described in the commit log
+ * is used here.
+ *
+ * The only enhancement is that:
+ *
+ * - The number of bits used by the backend filesystem for HUGE d_off which
+ * is described as 63, and
+ * - The number of bits used by the d_off presented by the transformation
+ * upwards which is described as 64, are both made "configurable."
+ */
+
+
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
+
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
+
int
dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
{
@@ -56,6 +106,9 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
int cnt = 0;
int max = 0;
uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
if (x == ((uint64_t) -1)) {
y = (uint64_t) -1;
@@ -69,7 +122,23 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
max = conf->subvolume_cnt;
cnt = dht_subvol_cnt (this, subvol);
- y = ((x * max) + cnt);
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
+
+ max_bits = dht_bits_for (max);
+
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
+
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
+ }
out:
if (y_p)
@@ -89,7 +158,7 @@ dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
int ret = 0; /* not found */
/* Why do other tasks if first required 'char' itself is not there */
- if (loc->name && !strchr (loc->name, '@'))
+ if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
goto out;
trav = this->children;
@@ -117,7 +186,6 @@ dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
new_loc->path = ((new_path) ? new_path:
gf_strdup (loc->path));
new_loc->name = new_name;
- new_loc->ino = loc->ino;
new_loc->inode = inode_ref (loc->inode);
new_loc->parent = inode_ref (loc->parent);
}
@@ -130,10 +198,8 @@ dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
out:
if (!ret) {
/* !success */
- if (new_path)
- GF_FREE (new_path);
- if (new_name)
- GF_FREE (new_name);
+ GF_FREE (new_path);
+ GF_FREE (new_name);
}
return ret;
}
@@ -147,16 +213,38 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
int max = 0;
uint64_t x = 0;
xlator_t *subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
if (!this->private)
- goto out;
+ return -1;
conf = this->private;
max = conf->subvolume_cnt;
- cnt = y % max;
- x = y / max;
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
+
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = dht_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
+
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
+
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
+ }
+out:
subvol = conf->subvolumes[cnt];
if (subvol_p)
@@ -165,7 +253,6 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
if (x_p)
*x_p = x;
-out:
return 0;
}
@@ -216,52 +303,65 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
local->selfheal.layout = NULL;
}
- if (local->newpath) {
- GF_FREE (local->newpath);
- }
+ GF_FREE (local->newpath);
- if (local->key) {
- GF_FREE (local->key);
- }
+ GF_FREE (local->key);
- GF_FREE (local);
+ GF_FREE (local->rebalance.vector);
+
+ if (local->rebalance.iobref)
+ iobref_unref (local->rebalance.iobref);
+
+ mem_put (local);
}
dht_local_t *
-dht_local_init (call_frame_t *frame)
+dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
- /* TODO: use mem-pool */
- local = GF_CALLOC (1, sizeof (*local),
- gf_dht_mt_dht_local_t);
-
+ local = mem_get0 (THIS->local_pool);
if (!local)
- return NULL;
+ goto out;
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+ if (loc) {
+ ret = loc_copy (&local->loc, loc);
+ if (ret)
+ goto out;
- frame->local = local;
+ inode = loc->inode;
+ }
- return local;
-}
+ if (fd) {
+ local->fd = fd_ref (fd);
+ if (!inode)
+ inode = fd->inode;
+ }
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
-char *
-basestr (const char *str)
-{
- char *basestr = NULL;
+ if (inode) {
+ local->layout = dht_layout_get (frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached (frame->this,
+ inode);
+ }
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
+ frame->local = local;
- return basestr;
+out:
+ if (ret) {
+ if (local)
+ mem_put (local);
+ local = NULL;
+ }
+ return local;
}
-
xlator_t *
dht_first_up_subvol (xlator_t *this)
{
@@ -326,17 +426,23 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
- if (is_fs_root (loc)) {
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ if (__is_root_gfid (loc->gfid)) {
subvol = dht_first_up_subvol (this);
goto out;
}
+ GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+
layout = dht_layout_get (this, loc->parent);
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
- "layout missing path=%s parent=%"PRId64,
- loc->path, loc->parent->ino);
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
goto out;
}
@@ -364,6 +470,8 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
layout = dht_layout_get (this, inode);
@@ -405,7 +513,36 @@ out:
return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+
+out:
+ return next;
+}
int
dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
{
@@ -434,6 +571,15 @@ out:
(a) = (b); \
} while (0)
+
+#define set_if_greater_time(a, an, b, bn) do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0) \
+
+
int
dht_iatt_merge (xlator_t *this, struct iatt *to,
struct iatt *from, xlator_t *subvol)
@@ -457,9 +603,12 @@ dht_iatt_merge (xlator_t *this, struct iatt *to,
set_if_greater (to->ia_uid, from->ia_uid);
set_if_greater (to->ia_gid, from->ia_gid);
- set_if_greater (to->ia_atime, from->ia_atime);
- set_if_greater (to->ia_mtime, from->ia_mtime);
- set_if_greater (to->ia_ctime, from->ia_ctime);
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
+ from->ia_atime, from->ia_atime_nsec);
+ set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
+ from->ia_mtime, from->ia_mtime_nsec);
+ set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
+ from->ia_ctime, from->ia_ctime_nsec);
return 0;
}
@@ -496,3 +645,503 @@ err:
loc_wipe (child);
return -1;
}
+
+
+
+int
+dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+{
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+ if (!conf)
+ return -1;
+
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
+
+ conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
+
+ conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
+ gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
+
+ conf->last_event = GF_CALLOC (cnt, sizeof (int),
+ gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
+
+ conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
+
+ conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+
+ conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+
+static int
+dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->rebalance.target_op_fn (THIS, frame, op_ret);
+
+ return 0;
+}
+
+
+int
+dht_migration_complete_check_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ struct iatt stbuf = {0,};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {0,};
+ char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ uint64_t tmp_subvol = 0;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+
+ if (!local->loc.inode) {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name);
+ } else {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ }
+
+ if (!ret)
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+
+ if (ret) {
+ if ((errno != ENOENT) || (!local->loc.inode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+ /* Need to do lookup on hashed subvol, then get the file */
+ ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL,
+ NULL);
+ if (ret)
+ goto out;
+ dst_node = dht_subvol_get_cached (this, local->loc.inode);
+ }
+
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the destination node",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ /* lookup on dst */
+ if (local->loc.inode) {
+ ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* update inode ctx (the layout) */
+ dht_layout_unref (this, local->layout);
+
+ ret = dht_layout_preset (this, dst_node, inode);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: could not set preset layout for subvol %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+
+ layout = dht_layout_for_subvol (this, dst_node);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, dst_node ? dst_node->name : "<nil>");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dht_layout_set (this, inode, layout);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set the new layout",
+ local->loc.path);
+ goto out;
+ }
+
+ local->cached_subvol = dst_node;
+ ret = 0;
+
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1 (inode, this, &tmp_subvol);
+ if (tmp_subvol)
+ goto out;
+
+ if (list_empty (&inode->fd_list))
+ goto out;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* perform 'open()' on all the fd's present on the inode */
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ ret = syncop_open (dst_node, &tmp_loc,
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
+ }
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done,
+ frame, frame);
+ return ret;
+}
+
+/* During 'in-progress' state, both nodes should have the file */
+static int
+dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = sync_frame->local;
+
+ local->rebalance.target_op_fn (THIS, sync_frame, op_ret);
+
+ return 0;
+}
+
+static int
+dht_rebalance_inprogress_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {0,};
+ loc_t tmp_loc = {0,};
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ } else {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name);
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr from dict",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ /* lookup on dst */
+ ret = syncop_lookup (dst_node, &local->loc, NULL,
+ &stbuf, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ if (list_empty (&inode->fd_list))
+ goto done;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID (0, 0);
+
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ ret = syncop_open (dst_node, &tmp_loc,
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to send open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
+ }
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+
+done:
+ ret = dht_inode_ctx_set1 (this, inode, dst_node);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set inode-ctx target file at %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+{
+
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done,
+ frame, frame);
+ return ret;
+}
+
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+
+ return ret;
+}
+
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, stat, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
+
+ time = &ctx->time;
+
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec,
+ stat->ia_mtime, stat->ia_mtime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec,
+ stat->ia_ctime, stat->ia_ctime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec,
+ stat->ia_atime, stat->ia_atime_nsec, inode, post);
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+out:
+ return 0;
+}
+
+int
+dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *) ctx_int;
+out:
+ return ret;
+}
+
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
new file mode 100644
index 000000000..ece84151a
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -0,0 +1,1139 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_access2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_open2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
+
+int
+dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
+
+ return 0;
+}
+
+int
+dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = ENOENT;
+ if (op_ret)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ NULL);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ xlator_t *subvol = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_errno = op_errno;
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ /* Phase 2 of migration */
+ local->rebalance.target_op_fn = dht_attr2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_attr2 (this, frame, 0);
+ return 0;
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ subvol = local->cached_subvol;
+ local->call_cnt = 2;
+
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, local->fd, NULL);
+ } else {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, &local->loc, NULL);
+ }
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf, xdata);
+ }
+err:
+ return 0;
+}
+
+int
+dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, loc, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->stat,
+ loc, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, fd, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->fstat,
+ fd, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if ((op_ret == -1) && (op_errno != ENOENT))
+ goto out;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ /* File would be migrated to other node */
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ local->rebalance.target_op_fn = dht_readv2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2 (this, frame, 0);
+ return 0;
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int
+dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_readv_cbk,
+ subvol, subvol->fops->readv,
+ fd, size, off, flags, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!prev || !prev->this)
+ goto out;
+ if (local->call_cnt != 1)
+ goto out;
+ if ((op_ret == -1) && (op_errno == ENOTCONN) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+
+ subvol = dht_subvol_next_available (this, prev->this);
+ if (!subvol)
+ goto out;
+
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+ return 0;
+ }
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* File would be migrated to other node */
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check (frame->this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ loc, mask, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+
+ local->op_errno = op_errno;
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ /* If context is set, then send flush() it to the destination */
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
+ dht_flush2 (this, frame, 0);
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, local->fd, NULL);
+
+ return 0;
+}
+
+
+int
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if (op_ret == -1 && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->op_errno = op_errno;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ local->rebalance.target_op_fn = dht_fsync2;
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ }
+
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ }
+ if (!ret)
+ return 0;
+ } else {
+ dht_fsync2 (this, frame, 0);
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, NULL);
+
+ return 0;
+}
+
+int
+dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ fd, datasync, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+int
+dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
+
+ return 0;
+}
+
+
+int
+dht_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
+ cmd, flock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+/* Symlinks are currently not migrated, so no need for any check here */
+int
+dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+err:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+
+ return 0;
+}
+
+
+int
+dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readlink_cbk,
+ subvol, subvol->fops->readlink,
+ loc, size, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
+
+int
+dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_xattrop_cbk,
+ subvol, subvol->fops->xattrop,
+ loc, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ dht_fxattrop_cbk,
+ subvol, subvol->fops->fxattrop,
+ fd, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_inodelk_cbk,
+ subvol, subvol->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
new file mode 100644
index 000000000..4b3f3a049
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -0,0 +1,1013 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);
+
+int
+dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ if (op_ret == -1 && (op_errno != ENOENT)) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ /* preserve the modes of source */
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_writev2;
+
+ local->op_errno = op_errno;
+ /* Phase 2 of migration */
+ if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_writev2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ local->fd, local->rebalance.vector, local->rebalance.count,
+ local->rebalance.offset, local->rebalance.flags,
+ local->rebalance.iobref, NULL);
+
+ return 0;
+}
+
+int
+dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ local->rebalance.vector = iov_dup (vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.flags = flags;
+ local->rebalance.iobref = iobref_ref (iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int
+dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_truncate2;
+
+ local->op_errno = op_errno;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
+ dht_truncate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+
+int
+dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ inode = local->fd ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset, NULL);
+ } else {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset, NULL);
+ }
+
+ return 0;
+}
+
+int
+dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->truncate,
+ loc, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->ftruncate,
+ fd, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_fallocate2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_fallocate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
+ local->fd, local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, NULL);
+
+ return 0;
+}
+
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fallocate_cbk,
+ subvol, subvol->fops->fallocate,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_discard2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_discard2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (discard, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_zerofill2;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_zerofill2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+/* handle cases of migration here for 'setattr()' calls */
+int
+dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_setattr2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here (ie, PHASE1) */
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ } else {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ }
+
+ return 0;
+}
+
+
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
+ dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+
+ return 0;
+}
+
+
+int
+dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 7e1f7afda..38e9970a7 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -68,9 +59,7 @@ dht_layout_t *
dht_layout_get (xlator_t *this, inode_t *inode)
{
dht_conf_t *conf = NULL;
- uint64_t layout_int = 0;
dht_layout_t *layout = NULL;
- int ret = -1;
conf = this->private;
if (!conf)
@@ -78,9 +67,8 @@ dht_layout_get (xlator_t *this, inode_t *inode)
LOCK (&conf->layout_lock);
{
- ret = inode_ctx_get (inode, this, &layout_int);
- if (ret == 0) {
- layout = (dht_layout_t *) (unsigned long) layout_int;
+ dht_inode_ctx_layout_get (inode, this, &layout);
+ if (layout) {
layout->ref++;
}
}
@@ -98,7 +86,6 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
int oldret = -1;
int ret = 0;
dht_layout_t *old_layout;
- uint64_t old_layout_int;
conf = this->private;
if (!conf)
@@ -106,16 +93,13 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
LOCK (&conf->layout_lock);
{
- oldret = inode_ctx_get (inode, this, &old_layout_int);
-
+ oldret = dht_inode_ctx_layout_get (inode, this, &old_layout);
layout->ref++;
- ret = inode_ctx_put (inode, this, (uint64_t) (unsigned long)
- layout);
+ dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
- if (oldret == 0) {
- old_layout = (dht_layout_t *) (unsigned long) old_layout_int;
+ if (!oldret) {
dht_layout_unref (this, old_layout);
}
@@ -130,7 +114,7 @@ dht_layout_unref (xlator_t *this, dht_layout_t *layout)
dht_conf_t *conf = NULL;
int ref = 0;
- if (layout->preset || !this->private)
+ if (!layout || layout->preset || !this->private)
return;
conf = this->private;
@@ -174,9 +158,9 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
int ret = 0;
- ret = dht_hash_compute (layout->type, name, &hash);
+ ret = dht_hash_compute (this, layout->type, name, &hash);
if (ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"hash computation failed for type=%d name=%s",
layout->type, name);
goto out;
@@ -191,7 +175,7 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
}
if (!subvol) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"no subvolume for hash (value) = %u", hash);
}
@@ -280,6 +264,9 @@ dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
if (disk_layout_p)
*disk_layout_p = disk_layout;
+ else
+ GF_FREE (disk_layout);
+
ret = 0;
out:
@@ -289,7 +276,7 @@ out:
int
dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw)
+ int pos, void *disk_layout_raw, int disk_layout_len)
{
int cnt = 0;
int type = 0;
@@ -297,19 +284,38 @@ dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
int stop_off = 0;
int disk_layout[4];
- /* TODO: assert disk_layout_ptr is of required length */
+ if (!disk_layout_raw) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "error no layout on disk for merge");
+ return -1;
+ }
- memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+ GF_ASSERT (disk_layout_len == sizeof (disk_layout));
+
+ memcpy (disk_layout, disk_layout_raw, disk_layout_len);
cnt = ntoh32 (disk_layout[0]);
if (cnt != 1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_ERROR,
"disk layout has invalid count %d", cnt);
return -1;
}
- /* TODO: assert type is compatible */
- type = ntoh32 (disk_layout[1]);
+ type = ntoh32 (disk_layout[1]);
+ switch (type) {
+ case DHT_HASH_TYPE_DM_USER:
+ gf_log (this->name, GF_LOG_DEBUG, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Catastrophic error layout with unknown type found %d",
+ disk_layout[1]);
+ return -1;
+ }
+
start_off = ntoh32 (disk_layout[2]);
stop_off = ntoh32 (disk_layout[3]);
@@ -329,11 +335,12 @@ int
dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
int op_ret, int op_errno, dict_t *xattr)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- void *disk_layout_raw = NULL;
-
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
if (op_ret != 0) {
err = op_errno;
@@ -354,12 +361,12 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (xattr) {
/* during lookup and not mkdir */
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- &disk_layout_raw);
+ ret = dict_get_ptr_and_len (xattr, conf->xattr_name,
+ &disk_layout_raw, &disk_layout_len);
}
if (ret != 0) {
- layout->list[i].err = -1;
+ layout->list[i].err = 0;
gf_log (this->name, GF_LOG_TRACE,
"missing disk layout on %s. err = %d",
subvol->name, err);
@@ -367,9 +374,10 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
goto out;
}
- ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw);
+ ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
+ disk_layout_len);
if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"layout merge from subvolume %s failed",
subvol->name);
goto out;
@@ -405,6 +413,22 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
layout->list[j].err = err_swap;
}
+void
+dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+}
+
int64_t
dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
{
@@ -412,17 +436,37 @@ dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
layout->list[j].xlator->name));
}
+
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, xlator->name))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
int64_t
dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
{
int64_t diff = 0;
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t) layout->list[i].stop
+ - (int64_t) layout->list[j].stop;
+ goto out;
+ }
if (layout->list[i].err || layout->list[j].err)
diff = layout->list[i].err - layout->list[j].err;
else
diff = (int64_t) layout->list[i].start
- (int64_t) layout->list[j].start;
+out:
return diff;
}
@@ -471,7 +515,8 @@ dht_layout_sort_volname (dht_layout_t *layout)
int
dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
uint32_t overlaps = 0;
uint32_t missing = 0;
@@ -484,30 +529,38 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t prev_stop = 0;
uint32_t last_stop = 0;
char is_virgin = 1;
+ uint32_t no_space = 0;
- /* TODO: explain WTF is happening */
+ /* TODO: explain what is happening */
last_stop = layout->list[0].start - 1;
prev_stop = last_stop;
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- down++;
- break;
- default:
- misc++;
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
}
+ break;
+ default:
+ misc++;
continue;
- }
+ }
is_virgin = 0;
@@ -540,6 +593,9 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
if (misc_p)
*misc_p = misc;
+ if (no_space_p)
+ *no_space_p = no_space;
+
return ret;
}
@@ -555,7 +611,6 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
uint32_t down = 0;
uint32_t misc = 0;
-
ret = dht_layout_sort (layout);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
@@ -565,7 +620,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_anomalies (this, loc, layout,
&holes, &overlaps,
- &missing, &down, &misc);
+ &missing, &down, &misc, NULL);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"error while finding anomalies in %s -- not good news",
@@ -583,43 +638,56 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
"found anomalies in %s. holes=%d overlaps=%d",
loc->path, holes, overlaps);
}
- ret = 1;
+ ret = -1;
}
for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place to
- * detect this - probably in dht_layout_anomalies()
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
*/
if (layout->list[i].err > 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path, strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name : "<>"));
- if (layout->list[i].err == ENOENT)
- ret = 1;
+ gf_log_callingfn (this->name, GF_LOG_DEBUG,
+ "path=%s err=%s on subvol=%s",
+ loc->path,
+ strerror (layout->list[i].err),
+ (layout->list[i].xlator ?
+ layout->list[i].xlator->name
+ : "<>"));
+ if ((layout->list[i].err == ENOENT) && (ret >= 0)) {
+ ret++;
+ }
}
}
+
out:
return ret;
}
+int
+dht_dir_has_layout (dict_t *xattr, char *name)
+{
+
+ void *disk_layout_raw = NULL;
+
+ return dict_get_ptr (xattr, name, &disk_layout_raw);
+}
int
dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
loc_t *loc, dict_t *xattr)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t disk_layout[4];
- void *disk_layout_raw = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ int32_t count = -1;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ dht_conf_t *conf = this->private;
for (idx = 0; idx < layout->cnt; idx++) {
@@ -649,7 +717,7 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
goto out;
}
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ dict_ret = dict_get_ptr (xattr, conf->xattr_name,
&disk_layout_raw);
if (dict_ret < 0) {
@@ -665,7 +733,7 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
count = ntoh32 (disk_layout[0]);
if (count != 1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_ERROR,
"%s - disk layout has invalid count %d",
loc->path, count);
ret = -1;
@@ -714,7 +782,7 @@ dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
LOCK (&conf->layout_lock);
{
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 1c90c61d6..dbc9d0b3c 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -28,37 +19,106 @@
#include "compat.h"
#include "dht-common.h"
+int
+dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_log (this->name, GF_LOG_WARNING, "got non-linkfile %s:%s",
+ prev->this->name, local->loc.path);
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, postparent, postparent,
+ xattr);
+ return 0;
+}
+#define is_equal(a, b) (a == b)
int
dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
local = frame->local;
+ if (!op_ret)
+ local->linked = _gf_true;
+
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new ();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set linkto key");
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_linkfile_lookup_cbk, subvol,
+ subvol->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+ }
+out:
local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent,
+ xdata);
+ if (xattrs)
+ dict_unref (xattrs);
return 0;
}
int
dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this,
xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
{
dht_local_t *local = NULL;
dict_t *dict = NULL;
int need_unref = 0;
int ret = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
local->linkfile.linkfile_cbk = linkfile_cbk;
local->linkfile.srcvol = tovol;
+ local->linked = _gf_false;
+
dict = local->params;
if (!dict) {
dict = dict_new ();
@@ -74,8 +134,12 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
"%s: gfid set failed", loc->path);
}
- ret = dict_set_str (dict, "trusted.glusterfs.dht.linkto",
- tovol->name);
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log ("dht-linkfile", GF_LOG_INFO,
+ "%s: internal-fop set failed", loc->path);
+
+ ret = dict_set_str (dict, conf->link_xattr_name, tovol->name);
if (ret < 0) {
gf_log (frame->this->name, GF_LOG_INFO,
@@ -84,9 +148,13 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
goto out;
}
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO (frame, dht_local_t);
STACK_WIND (frame, dht_linkfile_create_cbk,
fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0, dict);
+ S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
if (need_unref && dict)
dict_unref (dict);
@@ -94,7 +162,7 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
return 0;
out:
local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
- loc->inode, NULL, NULL, NULL);
+ loc->inode, NULL, NULL, NULL, NULL);
if (need_unref && dict)
dict_unref (dict);
@@ -106,7 +174,8 @@ out:
int
dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -140,16 +209,17 @@ dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
goto err;
}
- unlink_local = dht_local_init (unlink_frame);
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init (unlink_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
if (!unlink_local) {
goto err;
}
- loc_copy (&unlink_local->loc, loc);
-
STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
subvol, subvol->fops->unlink,
- &unlink_local->loc);
+ &unlink_local->loc, 0, NULL);
return 0;
err:
@@ -174,7 +244,7 @@ dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
if (!xattr)
goto out;
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname);
if ((-1 == ret) || !volname)
goto out;
@@ -189,3 +259,70 @@ dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
out:
return subvol;
}
+
+int
+dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_log (this->name, GF_LOG_ERROR, "setattr of uid/gid on %s"
+ " :<gfid:%s> failed (%s)",
+ (loc->path? loc->path: "NULL"),
+ uuid_utoa(local->gfid), strerror(op_errno));
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {0,};
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
+
+ uuid_copy (local->loc.gfid, local->stbuf.ia_gfid);
+
+ copy = copy_frame (frame);
+
+ if (!copy)
+ goto out;
+
+ copy_local = dht_local_init (copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+
+ FRAME_SU_DO (copy, dht_local_t);
+
+ STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol,
+ subvol->fops->setattr, &copy_local->loc,
+ &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 21fb5a7ca..e893eb48f 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -28,7 +19,6 @@ enum gf_dht_mem_types_ {
gf_dht_mt_dht_conf_t,
gf_dht_mt_char,
gf_dht_mt_int32_t,
- gf_dht_mt_dht_local_t,
gf_dht_mt_xlator_t,
gf_dht_mt_dht_layout_t,
gf_switch_mt_dht_conf_t,
@@ -37,6 +27,9 @@ enum gf_dht_mem_types_ {
gf_switch_mt_switch_struct,
gf_dht_mt_subvol_time,
gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_ctx_stat_time_t,
gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 745be4ce5..bcb19f23e 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -24,11 +15,12 @@
#endif
#include "dht-common.h"
+#include "xlator.h"
+#include <fnmatch.h>
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
#define DHT_REBALANCE_BLKSIZE (128 * 1024)
-#define DHT_MIGRATE_EVEN_IF_LINK_EXISTS 1
static int
dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
@@ -60,9 +52,14 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
ret = syncop_write (to, fd, (buf + tmp_offset),
(start_idx - tmp_offset),
(offset + tmp_offset),
- iobref);
- if (ret < 0)
+ iobref, 0);
+ /* 'path' will be logged in calling function */
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (errno));
goto out;
+ }
write_needed = 0;
}
@@ -73,9 +70,14 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
/* This means, last chunk is not yet written.. write it */
ret = syncop_write (to, fd, (buf + tmp_offset),
(buf_len - tmp_offset),
- (offset + tmp_offset), iobref);
- if (ret < 0)
+ (offset + tmp_offset), iobref, 0);
+ if (ret < 0) {
+ /* 'path' will be logged in calling function */
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (errno));
goto out;
+ }
}
size_pending = (size - buf_len);
@@ -83,52 +85,156 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
break;
}
- /* do it regardless of all the above cases as we had to 'write' the
- given number of bytes */
- ret = syncop_ftruncate (to, fd, offset + size);
+ ret = size;
+out:
+ return ret;
+
+}
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this, out);
+ GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (uuid_is_null (loc->pargfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
+ cached_subvol->name, hashed_subvol->name);
+ data = dict_get (xattrs, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str (xattrs, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "linkto xattr in dict for %s", loc->name);
+ goto out;
+ }
+
+ ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
+ "failed %s -> %s (%s)", cached_subvol->name,
+ loc->name, strerror (errno));
+ goto out;
+ }
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
+ if (!linkto_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "linkto subvol for %s", loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link (hashed_subvol, loc, loc);
+ if (ret) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
+ " failed on subvol %s (%s)", loc->name,
+ uuid_utoa(loc->gfid),
+ hashed_subvol->name, strerror (op_errno));
+ if (op_errno != EEXIST)
+ goto out;
+ }
+ }
+ ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to perform truncate on %s", to->name);
+ gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
+ , loc->name, hashed_subvol->name, strerror (errno));
goto out;
}
- ret = size;
+ if (iatt.ia_nlink == stbuf->ia_nlink) {
+ ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
out:
return ret;
-
}
+
static inline int
-__is_file_migratable (xlator_t *this, loc_t *loc, dict_t *rsp_dict,
- struct iatt *stbuf)
+__is_file_migratable (xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, dict_t *xattrs, int flags)
{
- int ret = -1;
- int open_fd_count = 0;
+ int ret = -1;
- if (!IA_ISREG (stbuf->ia_type)) {
+ if (IA_ISDIR (stbuf->ia_type)) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: migrate-file called on non-regular entry (0%o)",
- loc->path, stbuf->ia_type);
+ "%s: migrate-file called on directory", loc->path);
ret = -1;
goto out;
}
- if (stbuf->ia_nlink > 1) {
- /* TODO : support migrating hardlinks */
- gf_log (this->name, GF_LOG_WARNING, "%s: file has hardlinks",
- loc->path);
- ret = -ENOTSUP;
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
goto out;
}
-
- ret = dict_get_int32 (rsp_dict, GLUSTERFS_OPEN_FD_COUNT, &open_fd_count);
- if (!ret && (open_fd_count > 0)) {
- /* TODO: support migration of files with open fds */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: file has open fds, not attempting migration",
- loc->path);
+ if (stbuf->ia_nlink > 1) {
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ ret = gf_defrag_handle_hardlink (this, loc,
+ xattrs, stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file has hardlinks", loc->path);
+ }
+ ret = ENOTSUP;
goto out;
}
+
ret = 0;
out:
@@ -136,16 +242,17 @@ out:
}
static inline int
-__dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd, int *need_unlink)
+__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
+ dict_t *dict, fd_t **dst_fd, dict_t *xattr)
{
- xlator_t *this = NULL;
- int ret = -1;
- mode_t mode = 0;
- fd_t *fd = NULL;
- struct iatt new_stbuf = {0,};
+ xlator_t *this = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {0,};
+ dht_conf_t *conf = NULL;
this = THIS;
+ conf = this->private;
ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
if (ret) {
@@ -154,49 +261,72 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,
goto out;
}
+ ret = dict_set_str (dict, conf->link_xattr_name, from->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
fd = fd_create (loc->inode, DHT_REBALANCE_PID);
if (!fd) {
gf_log (this->name, GF_LOG_ERROR,
- "%s: fd create failed (destination)", loc->path);
+ "%s: fd create failed (destination) (%s)",
+ loc->path, strerror (errno));
ret = -1;
goto out;
}
ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to lookup %s on %s",
- loc->path, to->name);
-
- mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
- ret = syncop_create (to, loc, O_WRONLY, mode, fd, dict);
- if (ret < 0) {
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
gf_log (this->name, GF_LOG_ERROR,
- "failed to create %s on %s", loc->path, to->name);
+ "file %s exits in %s with different gfid",
+ loc->path, to->name);
+ fd_unref (fd);
goto out;
}
-
- *need_unlink = 1;
- goto done;
}
-
- /* File exits in the destination, just do the open if gfid matches */
- if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "file %s exits in %s with different gfid",
- loc->path, to->name);
- fd_unref (fd);
+ if ((ret == -1) && (errno != ENOENT)) {
+ /* File exists in destination, but not accessible */
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "%s: failed to lookup file (%s)",
+ loc->path, strerror (errno));
goto out;
}
- ret = syncop_open (to, loc, O_WRONLY, fd);
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, it will just open the file */
+ ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
+ dict, &new_stbuf);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s on %s",
- loc->path, to->name);
- fd_unref (fd);
+ "failed to create %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
goto out;
}
-done:
+
+ ret = syncop_fsetxattr (to, fd, xattr, 0);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_ftruncate (to, fd, stbuf->ia_size);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_fsetattr (to, fd, stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "chown failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
if (dst_fd)
*dst_fd = fd;
@@ -209,41 +339,75 @@ out:
static inline int
__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf)
+ struct iatt *stbuf, int flag)
{
struct statvfs src_statfs = {0,};
struct statvfs dst_statfs = {0,};
int ret = -1;
xlator_t *this = NULL;
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+
this = THIS;
ret = syncop_statfs (from, loc, &src_statfs);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
- "failed to get statfs of %s on %s",
- loc->path, from->name);
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
goto out;
}
ret = syncop_statfs (to, loc, &dst_statfs);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
- "failed to get statfs of %s on %s",
- loc->path, to->name);
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
goto out;
}
- if (((dst_statfs.f_bavail *
- dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) >
- (((src_statfs.f_bavail * src_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) - stbuf->ia_blocks)) {
- gf_log (this->name, GF_LOG_WARNING,
- "data movement attempted from node (%s) with"
- " higher disk space to a node (%s) with "
- "lesser disk space (%s)", from->name,
- to->name, loc->path);
- ret = -1;
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid errorneous move to destination where
+ the space could be scantily available.
+ */
+ if (stbuf) {
+ dst_statfs_blocks = ((dst_statfs.f_bavail *
+ dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ src_statfs_blocks = ((src_statfs.f_bavail *
+ src_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ if ((dst_statfs_blocks - stbuf->ia_blocks) <
+ (src_statfs_blocks + stbuf->ia_blocks)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "data movement attempted from node (%s) with"
+ " higher disk space to a node (%s) with "
+ "lesser disk space (%s)", from->name,
+ to->name, loc->path);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ ret = 1;
+ goto out;
+ }
+ }
+check_avail_space:
+ if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "data movement attempted from node (%s) with "
+ "to node (%s) which does not have required free space"
+ " for %s", from->name, to->name, loc->path);
+ ret = 1;
goto out;
}
@@ -253,18 +417,23 @@ out:
}
static inline int
-__dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
- int hole_exists)
+__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists)
{
- int ret = -1;
+ int ret = 0;
int count = 0;
off_t offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
-
- while (1) {
- ret = syncop_readv (from, src, DHT_REBALANCE_BLKSIZE,
- offset, &vector, &count, &iobref);
+ uint64_t total = 0;
+ size_t read_size = 0;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
+ DHT_REBALANCE_BLKSIZE : (ia_size - total));
+ ret = syncop_readv (from, src, read_size,
+ offset, 0, &vector, &count, &iobref);
if (!ret || (ret < 0)) {
break;
}
@@ -274,14 +443,14 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
ret, offset, iobref);
else
ret = syncop_writev (to, dst, vector, count,
- offset, iobref);
+ offset, iobref, 0);
if (ret < 0) {
break;
}
offset += ret;
+ total += ret;
- if (vector)
- GF_FREE (vector);
+ GF_FREE (vector);
if (iobref)
iobref_unref (iobref);
iobref = NULL;
@@ -289,8 +458,7 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
}
if (iobref)
iobref_unref (iobref);
- if (vector)
- GF_FREE (vector);
+ GF_FREE (vector);
if (ret >= 0)
ret = 0;
@@ -298,6 +466,212 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
return ret;
}
+
+static inline int
+__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *stbuf, fd_t **src_fd)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ struct iatt iatt = {0,};
+ dht_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: fd create failed (source)", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open (from, loc, O_RDWR, fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open file %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ ret = -1;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, conf->link_xattr_name, to->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)",
+ loc->path, to->name);
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr (from, loc, dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr on %s in %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set mode on %s in %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ if (src_fd)
+ *src_fd = fd;
+
+ /* success */
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *buf)
+{
+ int ret = -1;
+ dict_t *rsp_dict = NULL;
+ dict_t *dict = NULL;
+ char *link = NULL;
+ struct iatt stbuf = {0,};
+ dht_conf_t *conf = this->private;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* check in the destination if the file is link file */
+ ret = syncop_lookup (to, loc, dict, &stbuf, &rsp_dict, NULL);
+ if ((ret == -1) && (errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: lookup failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* file exists in target node, only if it is 'linkfile' its valid,
+ otherwise, error out */
+ if (!ret) {
+ if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file exists in destination", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ /* as file is linkfile, delete it */
+ ret = syncop_unlink (to, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to delete the linkfile (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+ }
+
+ /* Set the gfid of the source file in dict */
+ ret = dict_set_static_bin (dict, "gfid-req", buf->ia_gfid, 16);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ /* Create the file in target */
+ if (IA_ISLNK (buf->ia_type)) {
+ /* Handle symlinks separately */
+ ret = syncop_readlink (from, loc, &link, buf->ia_size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: readlink on symlink failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ ret = syncop_symlink (to, loc, link, dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: creating symlink failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ goto done;
+ }
+
+ ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
+ buf->ia_type),
+ makedev (ia_major (buf->ia_rdev),
+ ia_minor (buf->ia_rdev)), dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+done:
+ ret = syncop_setattr (to, loc, buf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ ret = syncop_unlink (from, loc);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)",
+ loc->path, strerror (errno));
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+/*
+ return values:
+
+ -1 : failure
+ 0 : successfully migrated data
+ 1 : not a failure, but we can't migrate data as of now
+*/
int
dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
int flag)
@@ -305,13 +679,15 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
int ret = -1;
struct iatt new_stbuf = {0,};
struct iatt stbuf = {0,};
+ struct iatt empty_iatt = {0,};
+ ia_prot_t src_ia_prot = {0,};
fd_t *src_fd = NULL;
fd_t *dst_fd = NULL;
dict_t *dict = NULL;
dict_t *xattr = NULL;
- dict_t *rsp_dict = NULL;
+ dict_t *xattr_rsp = NULL;
int file_has_holes = 0;
- int need_unlink = 0;
+ dht_conf_t *conf = this->private;
gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -320,112 +696,201 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (!dict)
goto out;
- ret = dict_set_int32 (dict, GLUSTERFS_OPEN_FD_COUNT, 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set fd-count key in dict, may attempt "
- "migration of file which has open fds", loc->path);
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
- ret = syncop_lookup (from, loc, dict, &stbuf, &rsp_dict, NULL);
+ /* Phase 1 - Data migration is in progress from now on */
+ ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s",
- loc->path, from->name);
+ gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
+ loc->path, from->name, strerror (errno));
goto out;
}
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* preserve source mode, so set the same to the destination */
+ src_ia_prot = stbuf.ia_prot;
+
/* Check if file can be migrated */
- ret = __is_file_migratable (this, loc, rsp_dict, &stbuf);
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
if (ret)
goto out;
- /* create the destination */
- ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd,
- &need_unlink);
- if (ret)
+ /* Take care of the special files */
+ if (!IA_ISREG (stbuf.ia_type)) {
+ /* Special files */
+ ret = migrate_special_files (this, from, to, loc, &stbuf);
goto out;
-
- /* Should happen on all files when 'force' option is not given */
- if (flag != DHT_MIGRATE_EVEN_IF_LINK_EXISTS) {
- ret = __dht_check_free_space (to, from, loc, &stbuf);
- if (ret)
- goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get xattr from %s (%s)",
+ loc->path, from->name, strerror (errno));
- src_fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!src_fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: fd create failed (source)", loc->path);
- ret = -1;
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
+ dict, &dst_fd, xattr);
+ if (ret)
+ goto out;
+
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
+ if (ret) {
goto out;
}
- ret = syncop_open (from, loc, O_RDONLY, src_fd);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s on %s",
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s",
loc->path, from->name);
goto out;
}
+
+ ret = syncop_fstat (from, src_fd, &stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
/* All I/O happens in this function */
- ret = __dht_rebalane_migrate_data (from, to, src_fd, dst_fd,
- file_has_holes);
+ ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",
loc->path);
+ /* reset the destination back to 0 */
+ ret = syncop_ftruncate (to, dst_fd, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to reset target size back to 0 (%s)",
+ loc->path, strerror (errno));
+ }
+
+ ret = -1;
goto out;
}
- ret = syncop_lookup (from, loc, NULL, &new_stbuf, NULL, NULL);
+ /* TODO: Sync the locks */
+
+ ret = syncop_fsync (to, dst_fd, 0);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat (from, src_fd, &new_stbuf);
if (ret < 0) {
/* Failed to get the stat info */
gf_log (this->name, GF_LOG_ERROR,
- "failed to lookup file %s on %s",
- loc->path, from->name);
- need_unlink = 0;
+ "failed to fstat file %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
goto out;
}
- /* No need to rebalance, if there is some
- activity on source file */
- if (new_stbuf.ia_mtime != stbuf.ia_mtime) {
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination, if it was not set
+ prior to setting rebalance-modes in source */
+ if (!src_ia_prot.sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot.sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: ignoring destination file as source has "
- "undergone some changes while migration was happening",
- loc->path);
- ret = -1;
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
goto out;
}
+ /* Because 'futimes' is not portable */
ret = syncop_setattr (to, loc, &new_stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_MODE | GF_SET_ATTR_ATIME |
- GF_SET_ATTR_MTIME), NULL, NULL);
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
+ NULL, NULL);
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform setattr on %s",
- loc->path, to->name);
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
}
- ret = syncop_listxattr (from, loc, &xattr);
- if (ret == -1)
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr (from, src_fd, &empty_iatt,
+ GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, \
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate (from, src_fd, 0);
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get xattr from %s", loc->path, from->name);
+ "%s: failed to perform truncate on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ }
- ret = syncop_setxattr (to, loc, xattr, 0);
- if (ret == -1)
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ /* Do a stat and check the gfid before unlink */
+ ret = syncop_stat (from, loc, &empty_iatt);
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set xattr on %s", loc->path, to->name);
+ "%s: failed to do a stat on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
+ /* take out the source from namespace */
+ ret = syncop_unlink (from, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform unlink on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+ }
- /* rebalance complete */
- syncop_close (dst_fd);
- syncop_close (src_fd);
- syncop_unlink (from, loc);
- need_unlink = 0;
+ ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to lookup the file on subvolumes (%s)",
+ loc->path, strerror (errno));
+ }
gf_log (this->name, GF_LOG_INFO,
"completed migration of %s from subvolume %s to %s",
@@ -436,14 +901,15 @@ out:
if (dict)
dict_unref (dict);
- if (ret) {
- if (dst_fd)
- syncop_close (dst_fd);
- if (src_fd)
- syncop_close (src_fd);
- if (need_unlink)
- syncop_unlink (to, loc);
- }
+ if (xattr)
+ dict_unref (xattr);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+
+ if (dst_fd)
+ syncop_close (dst_fd);
+ if (src_fd)
+ syncop_close (src_fd);
return ret;
}
@@ -452,19 +918,17 @@ static int
rebalance_task (void *data)
{
int ret = -1;
- xlator_t *this = NULL;
dht_local_t *local = NULL;
call_frame_t *frame = NULL;
frame = data;
- this = THIS;
local = frame->local;
/* This function is 'synchrounous', hence if it returns,
we are done with the task */
- ret = dht_migrate_file (THIS, &local->loc, local->from_subvol,
- local->to_subvol, local->flags);
+ ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags);
return ret;
}
@@ -477,6 +941,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
dht_layout_t *layout = 0;
xlator_t *this = NULL;
dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
this = THIS;
local = sync_frame->local;
@@ -490,16 +955,31 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
dht_layout_unref (this, layout);
}
- ret = dht_layout_preset (this, local->to_subvol,
+ ret = dht_layout_preset (this, local->rebalance.target_node,
local->loc.inode);
if (ret)
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to set inode ctx", local->loc.path);
}
- /* if success, errno is not checked,
- if ret is -1, then let errno be 'ENOTSUP' */
- DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, ENOTSUP);
+ if (op_ret == -1) {
+ /* Failure of migration process, mostly due to write process.
+ as we can't preserve the exact errno, lets say there was
+ no space to migrate-data
+ */
+ op_errno = ENOSPC;
+ }
+
+ if (op_ret == 1) {
+ /* migration didn't happen, but is not a failure, let the user
+ understand that he doesn't have permission to migrate the
+ file.
+ */
+ op_ret = -1;
+ op_errno = EPERM;
+ }
+
+ DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
return 0;
}
@@ -507,12 +987,829 @@ int
dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
{
int ret = -1;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
- ret = synctask_new (conf->env, rebalance_task,
+ ret = synctask_new (this->ctx->env, rebalance_task,
rebalance_task_completion,
frame, frame);
return ret;
}
+
+int
+gf_listener_stop (xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((errno != ENOSPC) || (errno != ENOTCONN))
+ return 1;
+
+ if (errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+ out:
+ return ret;
+}
+
+/* We do a depth first traversal of directories. But before we move into
+ * subdirs, we complete the data migration of those directories whose layouts
+ * have been fixed
+ */
+
+int
+gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ dict_t *dict = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
+ int readdir_operrno = 0;
+ struct timeval dir_start = {0,};
+ struct timeval end = {0,};
+ double elapsed = {0,};
+ struct timeval start = {0,};
+ int32_t err = 0;
+
+ gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
+ loc->path);
+ gettimeofday (&dir_start, NULL);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0) {
+
+ if (ret < 0) {
+
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s."
+ " Aborting migrate-data",
+ strerror(readdir_operrno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdir_operrno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&start, NULL);
+ }
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match (defrag, entry->d_name,
+ entry->d_stat.ia_size)
+ == _gf_false)) {
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_NODE_UUID_KEY);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid for %s", entry_loc.path);
+ continue;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
+ &uuid_str);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid from dict for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ if (uuid_parse (uuid_str, node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
+ "failed for %s", entry_loc.path);
+ continue;
+ }
+
+ /* if file belongs to different node, skip migration
+ * the other node will take responsibility of migration
+ */
+ if (uuid_compare (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_TRACE, "%s does not"
+ "belong to this node", entry_loc.path);
+ continue;
+ }
+
+ uuid_str = NULL;
+
+ dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+
+
+ /* if distribute is present, it will honor this key.
+ * -1 is returned if distribute is not present or file
+ * doesn't have a link-file. If file has link-file, the
+ * path of link-file will be the value, and also that
+ * guarantees that file has to be mostly migrated */
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_LINKINFO_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE, "failed to "
+ "get link-to key for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data,
+ 0);
+ if (ret) {
+ err = op_errno;
+ /* errno is overloaded. See
+ * rebalance_task_completion () */
+ if (err != ENOSPC) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data skipped for %s"
+ " due to space constraints",
+ entry_loc.path);
+ defrag->skipped +=1;
+ } else{
+ gf_log (this->name, GF_LOG_ERROR,
+ "migrate-data failed for %s",
+ entry_loc.path);
+ defrag->total_failures +=1;
+ }
+ }
+
+ if (ret == -1) {
+ op_errno = errno;
+ ret = gf_defrag_handle_migrate_error (op_errno,
+ defrag);
+
+ if (!ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data on %s failed: %s",
+ entry_loc.path,
+ strerror (op_errno));
+ else if (ret == 1)
+ continue;
+ else if (ret == -1)
+ goto out;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+ (end.tv_usec - start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration of "
+ "file:%s size:%"PRIu64" bytes took %.2f"
+ "secs", entry_loc.path, iatt.ia_size,
+ elapsed/1e6);
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+
+ if (readdir_operrno == ENOENT)
+ break;
+ }
+
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+ (end.tv_usec - dir_start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
+ "%.2f secs", loc->path, elapsed/1e6);
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+
+}
+
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ dict_t *dict = NULL;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+ int readdirp_errno = 0;
+
+ ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
+ if (ret)
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
+ ". Aborting fix-layout",strerror(errno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdirp_errno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ defrag->total_failures ++;
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Fix layout "
+ "failed for %s", entry_loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ if (readdirp_errno == ENOENT)
+ break;
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+
+}
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ if (!this)
+ goto out;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ gettimeofday (&defrag->start_time, NULL);
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data", "force");
+ else
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data",
+ "non-force");
+ if (ret)
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+
+
+out:
+ LOCK (&defrag->lock);
+ {
+ status = dict_new ();
+ gf_defrag_status_get (defrag, status);
+ if (ctx->notify)
+ ctx->notify (GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref (status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag) {
+ GF_FREE (defrag);
+ conf->defrag = NULL;
+ }
+
+ return ret;
+}
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop (sync_frame->this);
+
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create"
+ " task for rebalance");
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ struct timeval end = {0,};
+
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ gettimeofday (&end, NULL);
+
+ elapsed = end.tv_sec - defrag->start_time.tv_sec;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+ if (elapsed) {
+ ret = dict_set_double (dict, "run-time", elapsed);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set run-time");
+ }
+
+ ret = dict_set_uint64 (dict, "failures", failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set failure count");
+
+ ret = dict_set_uint64 (dict, "skipped", skipped);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set skipped file count");
+log:
+ switch (defrag->defrag_status) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f "
+ "secs", status, elapsed);
+ gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
+ "%"PRIu64, files, size, lookup, failures, skipped);
+
+
+out:
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_INFO, "Received stop command on rebalance");
+ defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ if (output)
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 538cfac95..5d6f4f232 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
@@ -35,7 +26,8 @@ int
dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -80,10 +72,11 @@ unwind:
WIPE (&local->preparent);
WIPE (&local->postparent);
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, xdata);
}
return 0;
@@ -96,7 +89,7 @@ dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preoldparent,
struct iatt *postoldparent,
struct iatt *prenewparent,
- struct iatt *postnewparent)
+ struct iatt *postnewparent, dict_t *xdata)
{
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
@@ -146,7 +139,7 @@ dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, dht_rename_dir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
if (!--call_cnt)
break;
}
@@ -159,10 +152,11 @@ unwind:
WIPE (&local->preparent);
WIPE (&local->postparent);
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -172,9 +166,7 @@ int
dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- conf = this->private;
local = frame->local;
if (local->op_ret == -1)
@@ -185,19 +177,20 @@ dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
STACK_WIND (frame, dht_rename_hashed_dir_cbk,
local->dst_hashed,
local->dst_hashed->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
return 0;
err:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
int
dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -226,7 +219,7 @@ dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -246,7 +239,7 @@ dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, dht_rename_readdir_cbk,
prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ local->fd, 4096, 0, NULL);
return 0;
@@ -302,22 +295,54 @@ dht_rename_dir (call_frame_t *frame, xlator_t *this)
STACK_WIND (frame, dht_rename_opendir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd);
+ &local->loc2, local->fd, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
+#define DHT_MARK_FOP_INTERNAL(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set" \
+ " internal dict key for %s", local->loc.path); \
+ } \
+ }while (0)
+int
+dht_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ local = frame->local;
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+}
int
dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -346,10 +371,7 @@ dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
WIPE (&local->postparent);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_done (frame, this);
}
out:
@@ -367,7 +389,7 @@ dht_rename_cleanup (call_frame_t *frame)
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
int call_cnt = 0;
-
+ dict_t *xattr = NULL;
local = frame->local;
this = frame->this;
@@ -391,13 +413,15 @@ dht_rename_cleanup (call_frame_t *frame)
if (!call_cnt)
goto nolinks;
+ DHT_MARK_FOP_INTERNAL (xattr);
+
if (dst_hashed != src_hashed && dst_hashed != src_cached) {
gf_log (this->name, GF_LOG_TRACE,
"unlinking linkfile %s @ %s => %s",
local->loc.path, dst_hashed->name, src_cached->name);
STACK_WIND (frame, dht_rename_unlink_cbk,
dst_hashed, dst_hashed->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr);
}
if (src_cached != dst_hashed) {
@@ -406,9 +430,12 @@ dht_rename_cleanup (call_frame_t *frame)
local->loc2.path, src_cached->name);
STACK_WIND (frame, dht_rename_unlink_cbk,
src_cached, src_cached->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr);
}
+ if (xattr)
+ dict_unref (xattr);
+
return 0;
nolinks:
@@ -417,10 +444,11 @@ nolinks:
WIPE (&local->preparent);
WIPE (&local->postparent);
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
return 0;
}
@@ -428,9 +456,10 @@ nolinks:
int
dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_frame_t *prev = NULL;
dht_local_t *local = NULL;
@@ -444,6 +473,10 @@ dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path, prev->this->name, strerror (op_errno));
}
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
DHT_STACK_DESTROY (frame);
return 0;
@@ -454,7 +487,8 @@ int
dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -465,6 +499,7 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *rename_subvol = NULL;
call_frame_t *link_frame = NULL;
dht_local_t *link_local = NULL;
+ dict_t *xattr = NULL;
local = frame->local;
prev = cookie;
@@ -474,6 +509,8 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dst_hashed = local->dst_hashed;
dst_cached = local->dst_cached;
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO (frame, dht_local_t);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"%s: rename on %s failed (%s)", local->loc.path,
@@ -489,28 +526,39 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
- link_local = dht_local_init (link_frame);
+ /* fop value sent as maxvalue because it is not used
+ anywhere in this case */
+ link_local = dht_local_init (link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
if (!link_local) {
goto err;
}
- loc_copy (&link_local->loc, &local->loc2);
if (link_local->loc.inode)
inode_unref (link_local->loc.inode);
link_local->loc.inode = inode_ref (local->loc.inode);
uuid_copy (link_local->gfid, local->loc.inode->gfid);
- link_frame->local = link_local;
dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
- src_cached, dst_hashed, &link_local->loc);
+ this, src_cached, dst_hashed,
+ &link_local->loc);
}
err:
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent, prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent, prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent, prev->this);
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev->this == src_cached) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+ }
+
/* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
* is called. since rename has already happened on rename_subvol,
@@ -535,6 +583,8 @@ err:
if (local->call_cnt == 0)
goto unwind;
+ DHT_MARK_FOP_INTERNAL (xattr);
+
if (src_cached != dst_hashed && src_cached != dst_cached) {
gf_log (this->name, GF_LOG_TRACE,
"deleting old src datafile %s @ %s",
@@ -542,7 +592,7 @@ err:
STACK_WIND (frame, dht_rename_unlink_cbk,
src_cached, src_cached->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr);
}
if (src_hashed != rename_subvol && src_hashed != src_cached) {
@@ -552,7 +602,7 @@ err:
STACK_WIND (frame, dht_rename_unlink_cbk,
src_hashed, src_hashed->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr);
}
if (dst_cached
@@ -564,8 +614,10 @@ err:
STACK_WIND (frame, dht_rename_unlink_cbk,
dst_cached, dst_cached->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr);
}
+ if (xattr)
+ dict_unref (xattr);
return 0;
unwind:
@@ -573,15 +625,16 @@ unwind:
WIPE (&local->postoldparent);
WIPE (&local->preparent);
WIPE (&local->postparent);
+ if (xattr)
+ dict_unref (xattr);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_done (frame, this);
return 0;
cleanup:
+ if (xattr)
+ dict_unref (xattr);
dht_rename_cleanup (frame);
return 0;
@@ -615,9 +668,11 @@ dht_do_rename (call_frame_t *frame)
"renaming %s => %s (%s)",
local->loc.path, local->loc2.path, rename_subvol->name);
+ if (local->linked == _gf_true)
+ FRAME_SU_DO (frame, dht_local_t);
STACK_WIND (frame, dht_rename_cbk,
rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
return 0;
}
@@ -627,7 +682,8 @@ int
dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -642,7 +698,11 @@ dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"link/file on %s failed (%s)",
prev->this->name, strerror (op_errno));
local->op_ret = -1;
- local->op_errno = op_errno;
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ } else if (local->src_cached == prev->this) {
+ /* merge of attr returned only from linkfile creation */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
}
this_call_cnt = dht_frame_return (frame);
@@ -665,7 +725,8 @@ cleanup:
int
dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -707,6 +768,7 @@ dht_rename_create_links (call_frame_t *frame)
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
int call_cnt = 0;
+ dict_t *xattr = NULL;
local = frame->local;
@@ -717,6 +779,7 @@ dht_rename_create_links (call_frame_t *frame)
dst_hashed = local->dst_hashed;
dst_cached = local->dst_cached;
+ DHT_MARK_FOP_INTERNAL (xattr);
if (src_cached == dst_cached) {
if (dst_hashed == dst_cached)
@@ -728,7 +791,7 @@ dht_rename_create_links (call_frame_t *frame)
STACK_WIND (frame, dht_rename_unlink_links_cbk,
dst_hashed, dst_hashed->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr);
return 0;
}
@@ -745,7 +808,7 @@ dht_rename_create_links (call_frame_t *frame)
"linkfile %s @ %s => %s",
local->loc.path, dst_hashed->name, src_cached->name);
memcpy (local->gfid, local->loc.inode->gfid, 16);
- dht_linkfile_create (frame, dht_rename_links_cbk,
+ dht_linkfile_create (frame, dht_rename_links_cbk, this,
src_cached, dst_hashed, &local->loc);
}
@@ -755,7 +818,7 @@ dht_rename_create_links (call_frame_t *frame)
local->loc2.path, src_cached->name);
STACK_WIND (frame, dht_rename_links_cbk,
src_cached, src_cached->fops->link,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, xattr);
}
nolinks:
@@ -763,6 +826,8 @@ nolinks:
/* skip to next step */
dht_do_rename (frame);
}
+ if (xattr)
+ dict_unref (xattr);
return 0;
}
@@ -770,7 +835,7 @@ nolinks:
int
dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_t *src_cached = NULL;
xlator_t *src_hashed = NULL;
@@ -815,17 +880,14 @@ dht_rename (call_frame_t *frame, xlator_t *this,
if (newloc->inode)
dst_cached = dht_subvol_get_cached (this, newloc->inode);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
if (!local) {
op_errno = ENOMEM;
goto err;
}
-
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
ret = loc_copy (&local->loc2, newloc);
if (ret == -1) {
@@ -855,7 +917,8 @@ dht_rename (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index ca41fd81d..3fe96b1c7 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -26,7 +17,7 @@
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
-
+#include "glusterfs-acl.h"
#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path) do { \
layout->list[i].start = srt; \
@@ -38,43 +29,40 @@
layout->list[i].xlator->name, path); \
} while (0)
+#define DHT_RESET_LAYOUT_RANGE(layout) do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++ ) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
-static inline uint32_t
-dht_find_overlap (int idx, int cnk_idx, uint32_t start, uint32_t stop,
- uint32_t chunk_size)
+static uint32_t
+dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
{
- uint32_t overlap = 0;
- uint32_t chunk_begin = 0;
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
- chunk_begin = cnk_idx * chunk_size;
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
- /* There is no chance of overlap */
- if ((chunk_begin > stop) ||
- ((chunk_begin + chunk_size) < start))
- goto out;
-
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) <= stop)) {
- overlap = ((chunk_begin + chunk_size) - start);
- goto out;
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
}
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - start);
- goto out;
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
}
- if ((chunk_begin < stop) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - chunk_begin);
- goto out;
- }
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
-out:
- return overlap;
+ return min (old->list[o].stop, new->list[n].stop) -
+ max (old->list[o].start, new->list[n].start) + 1;
}
+
int
dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
{
@@ -82,7 +70,7 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
local = frame->local;
local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno);
+ local->op_errno, NULL);
return 0;
}
@@ -90,7 +78,7 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
int
dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -129,18 +117,32 @@ dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
{
xlator_t *subvol = NULL;
dict_t *xattr = NULL;
int ret = 0;
xlator_t *this = NULL;
int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
-
- subvol = layout->list[i].xlator;
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
this = frame->this;
+ GF_VALIDATE_OR_GOTO ("", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, layout, err);
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
xattr = get_new_dict ();
if (!xattr) {
goto err;
@@ -154,8 +156,7 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
goto err;
}
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
+ ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"%s: (subvol %s) failed to set xattr dictionary",
@@ -171,9 +172,12 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
dict_ref (xattr);
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
+
STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
subvol, subvol->fops->setxattr,
- loc, xattr, 0);
+ loc, xattr, 0, NULL);
dict_unref (xattr);
@@ -183,11 +187,10 @@ err:
if (xattr)
dict_destroy (xattr);
- if (disk_layout)
- GF_FREE (disk_layout);
+ GF_FREE (disk_layout);
dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM);
+ -1, ENOMEM, NULL);
return 0;
}
@@ -198,21 +201,42 @@ dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
int i = 0;
int count = 0;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
gf_log (this->name, GF_LOG_DEBUG,
"writing the new range for all subvolumes");
- local->call_cnt = count = layout->cnt;
+ local->call_cnt = count = conf->subvolume_cnt;
for (i = 0; i < layout->cnt; i++) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--count == 0)
- break;
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
}
+
+ dht_layout_unref (this, dummy);
+out:
return 0;
}
@@ -223,14 +247,17 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
int missing_xattr = 0;
int i = 0;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err != -1 || !layout->list[i].stop) {
/* err != -1 would mean xattr present on the directory
- * or the directory is itself non existant.
+ * or the directory is non existent.
* !layout->list[i].stop would mean layout absent
*/
@@ -254,18 +281,30 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
if (layout->list[i].err != -1 || !layout->list[i].stop)
continue;
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--missing_xattr == 0)
break;
}
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ }
+ }
+ dht_layout_unref (this, dummy);
+out:
return 0;
}
int
dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -306,6 +345,9 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
+
local->call_cnt = missing_attr;
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == -1) {
@@ -316,7 +358,7 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
}
}
@@ -327,7 +369,8 @@ int
dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -373,6 +416,46 @@ out:
return 0;
}
+void
+dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+{
+ data_t *acl_default = NULL;
+ data_t *acl_access = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ GF_ASSERT (xattr);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+
+ if (!acl_default) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_DEFAULT xattr not present");
+ goto cont;
+ }
+ ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_DEFAULT xattr");
+cont:
+ acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
+ if (!acl_access) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_ACCESS xattr not present");
+ goto out;
+ }
+ ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_ACCESS xattr");
+
+out:
+ return;
+}
int
dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
@@ -406,16 +489,19 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
if (ret)
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%s: failed to set gfid in dict", loc->path);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
dict = dict_ref (local->params);
}
+ /* Set acls */
+ if (local->xattr && dict)
+ dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
if (!dict)
gf_log (this->name, GF_LOG_WARNING,
- "dict is NULL, need to make sure gfid's are same");
+ "dict is NULL, need to make sure gfids are same");
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == ENOENT || force) {
@@ -429,7 +515,7 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
loc,
st_mode_from_ia (local->stbuf.ia_prot,
local->stbuf.ia_type),
- dict);
+ 0, dict);
}
}
@@ -448,7 +534,7 @@ dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
uint32_t hashval = 0;
int ret = 0;
- ret = dht_hash_compute (layout->type, loc->path, &hashval);
+ ret = dht_hash_compute (this, layout->type, loc->path, &hashval);
if (ret == 0) {
start = (hashval % layout->cnt);
}
@@ -460,14 +546,52 @@ static inline int
dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
{
int i = 0;
+ int j = 0;
int err = 0;
int count = 0;
+ dht_conf_t *conf = NULL;
+
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = EINVAL;
+ break;
+ }
+ }
+ }
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+
+ It is important to note that it is safe
+ to race with mkdir() as self-heal and
+ mkdir are idempotent operations. Both will
+ strive to set the directory and layouts to
+ the same final state.
+ */
count++;
+ if (!err)
+ layout->list[i].err = -1;
}
}
@@ -482,49 +606,126 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
}
}
- count = ((layout->spread_cnt) ? layout->spread_cnt :
- ((count) ? count : 1));
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-availbale). Else return count (available up bricks) */
+ count = ((layout->spread_cnt &&
+ (layout->spread_cnt <= count)) ?
+ layout->spread_cnt : ((count) ? count : 1));
return count;
}
+void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
+void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
+
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x,y) table[x*new->cnt+y]
+
+void
+dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname (old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap)*old->cnt*new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table,0,sizeof(overlap)*old->cnt*new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
+ }
+ }
+ }
+
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap (new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i,j);
+ OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
+ OV_ENTRY(max_overlap_idx,j) = overlap;
+ }
+ }
+ }
+}
+
+
dht_layout_t *
dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
- uint32_t chunk = 0;
- uint32_t start = 0;
- uint32_t stop = 0;
- uint32_t overlap = 0;
- uint32_t max_overlap = 0;
- uint32_t chunk_begin = 0;
- int count = 0;
- int cnt = 0;
int i = 0;
- int j = 0;
- int k = 0;
- int loop_cnt = 0;
- int start_subvol = 0;
- int *fix_array = NULL;
xlator_t *this = NULL;
dht_layout_t *new_layout = NULL;
dht_conf_t *priv = NULL;
dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ int ret = 0;
this = frame->this;
priv = this->private;
local = frame->local;
- count = cnt = dht_get_layout_count (this, layout, 0);
-
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
-
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
-
- fix_array = GF_CALLOC (sizeof (int), layout->cnt, gf_common_mt_char);
- if (!fix_array) {
- /* No fix, use the existing layout itself */
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "leaving %s alone",
+ loc->path);
goto done;
}
@@ -532,98 +733,33 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
if (!new_layout)
goto done;
- for (i = 0; i < new_layout->cnt; i++) {
- /* TODO: fix this in layout_alloc() itself */
- new_layout->list[i].err = -ENOENT;
- if (i < layout->cnt)
- new_layout->list[i].xlator = layout->list[i].xlator;
- }
-
- /* Check if there are any overlap in layout, and give the proper fix */
- for (i = 0; i < layout->cnt; i++) {
- /* No need to fix if 'err' is not '-1' */
- if (layout->list[i].err != -1)
- continue;
-
- /* If already existing layout is having no range, skip it */
- start = layout->list[i].start;
- stop = layout->list[i].stop;
- if ((stop - start) == 0)
- continue;
-
- max_overlap = 0;
-
- /* 'j' is used as starting point of each chunk */
- for (j = 1; j <= count; j++) {
- /* if chunk is already used, don't use it again */
- for (k = 0; k < i; k++)
- if (j == fix_array[k])
- break;
- if (k < i)
- continue;
-
- overlap = dht_find_overlap (i, (j-1), start, stop, chunk);
- if (max_overlap < overlap) {
- max_overlap = overlap;
- fix_array[i] = j;
- }
- }
-
- /* If we have any overlap, then use that itself as new
- layout for the subvolume */
- if (fix_array[i]) {
- chunk_begin = chunk * (fix_array[i] - 1);
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin,
- chunk, cnt, loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (fix_array[i] == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
+ /* If a subvolume is down, do not re-write the layout. */
+ ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
+ &subvol_down, NULL, NULL);
- }
+ if (subvol_down || (ret == -1)) {
+ gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down"
+ ". Skipping fix layout.", subvol_down);
+ GF_FREE (new_layout);
+ return NULL;
}
- /* Now, look for layouts which are not having any overlaps
- and give it a fix */
- for (loop_cnt = 0, i = start_subvol; loop_cnt < new_layout->cnt;
- i++, loop_cnt++) {
- if (i == new_layout->cnt)
- i = 0;
-
- /* If 'fix_array[i]' is set, the layout is already fixed. */
- if (fix_array[i])
- continue;
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
- if (layout->list[i].err != -1) {
- new_layout->list[i].err = layout->list[i].err;
- continue;
- }
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
- for (k = 1; k <= count; k++) {
- for (j = 0; j < new_layout->cnt; j++) {
- if (k == fix_array[j])
- break;
- }
- /* Didn't find any of the list begining with 'k' */
- if (j == new_layout->cnt)
- break;
- }
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname (new_layout);
+ dht_selfheal_layout_new_directory (frame, loc, new_layout);
- fix_array[i] = k;
- chunk_begin = (k - 1) * chunk;
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin, chunk, cnt,
- loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (k == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
- }
+ /* Now selectively re-assign ranges only when it helps */
+ dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, layout);
done:
if (new_layout) {
@@ -637,7 +773,7 @@ done:
local->layout = new_layout;
}
- return new_layout;
+ return local->layout;
}
@@ -661,9 +797,11 @@ dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE (layout);
for (i = start_subvol; i < layout->cnt; i++) {
err = layout->list[i].err;
- if (err == -1) {
+ if (err == -1 || err == ENOENT) {
DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
cnt, loc->path);
if (--cnt == 0) {
@@ -676,7 +814,7 @@ dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
for (i = 0; i < start_subvol; i++) {
err = layout->list[i].err;
- if (err == -1) {
+ if (err == -1 || err == ENOENT) {
DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
cnt, loc->path);
if (--cnt == 0) {
@@ -695,35 +833,17 @@ int
dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
+ uint32_t holes = 0;
int ret = -1;
int i = -1;
- int overlaps = -1;
+ uint32_t overlaps = 0;
- this = frame->this;
- conf = this->private;
local = frame->local;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
holes = local->selfheal.hole_cnt;
overlaps = local->selfheal.overlaps_cnt;
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
-
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
-
if (holes || overlaps) {
dht_selfheal_layout_new_directory (frame, loc, layout);
ret = 0;
@@ -775,6 +895,9 @@ dht_fix_directory_layout (call_frame_t *frame,
/* No layout sorting required here */
tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
return 0;
@@ -786,7 +909,6 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
- uint32_t holes = 0;
uint32_t down = 0;
uint32_t misc = 0;
int ret = 0;
@@ -798,11 +920,9 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
dht_layout_anomalies (this, loc, layout,
&local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
- holes = local->selfheal.hole_cnt;
down = local->selfheal.down;
misc = local->selfheal.misc;
@@ -810,14 +930,14 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.layout = dht_layout_ref (this, layout);
if (down) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%d subvolumes down -- not fixing", down);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%d subvolumes have unrecoverable errors", misc);
ret = 0;
goto sorry_no_fix;
@@ -827,7 +947,7 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
ret = dht_selfheal_dir_getafix (frame, loc, layout);
if (ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"not able to form layout for the directory");
goto sorry_no_fix;
}
@@ -860,3 +980,50 @@ dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
return ret;
}
+
+int
+dht_dir_attr_heal (void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", conf, out);
+
+ call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || (subvol == dht_first_up_subvol (this)))
+ continue;
+ ret = syncop_setattr (subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret)
+ gf_log ("dht", GF_LOG_ERROR, "Failed to set uid/gid on"
+ " %s on %s subvol (%s)", local->loc.path,
+ subvol->name, strerror (errno));
+ }
+out:
+ return 0;
+}
+
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY (sync_frame);
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 000000000..70aac7710
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,758 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "statedump.h"
+#include "dht-common.h"
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+struct volume_options options[];
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix)
+{
+
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+ if (!prefix)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR (layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+
+int32_t
+dht_priv_dump (xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ sprintf (key, "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]){
+ sprintf (key, "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ sprintf (key, "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+
+ sprintf (key, "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d",
+ (int)conf->subvolume_status[i]);
+ }
+
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ if (conf ->du_stats) {
+ gf_proc_dump_write("du_stats.avail_percent", "%lf",
+ conf->du_stats->avail_percent);
+ gf_proc_dump_write("du_stats.avail_space", "%lu",
+ conf->du_stats->avail_space);
+ gf_proc_dump_write("du_stats.avail_inodes", "%lf",
+ conf->du_stats->avail_inodes);
+ gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
+ }
+
+ if (conf->last_stat_fetch.tv_sec)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch.tv_sec));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get (inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+int
+dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup (bricks);
+ node = strtok_r (dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp (conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] =
+ conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_log (this->name, GF_LOG_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r (NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE (dup_brick);
+
+ return ret;
+}
+
+
+int
+dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+ int ret = -1;
+
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+void
+dht_init_regex (xlator_t *this, dict_t *odict, char *name,
+ regex_t *re, gf_boolean_t *re_valid)
+{
+ char *temp_str;
+
+ if (dict_get_str (odict, name, &temp_str) != 0) {
+ if (strcmp(name,"rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str,"none")) {
+ return;
+ }
+
+ if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ }
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "compiling regex %s failed", temp_str);
+ }
+}
+
+int
+dht_reconfigure (xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp (temp_str, "auto")) {
+ if (!gf_string2boolean (temp_str, &search_unhashed)) {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured (%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
+ " lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ //return -1;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
+ percent, out);
+
+ GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
+ bool, out);
+ if (conf->defrag) {
+ GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
+ options, bool, out);
+ }
+
+ if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ ret = dht_decommissioned_remove (this, conf);
+ if (ret == -1)
+ goto out;
+ }
+
+ dht_init_regex (this, options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r (data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup (pattern_str);
+ pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
+ 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize(pattern, &pattern_list->size)
+ == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize (num, &pattern_list->size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+ memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE (pattern_list);
+ GF_FREE (dup_str);
+
+ return ret;
+}
+
+int
+dht_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+
+ ret = dict_get_str (this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
+ "specified");
+ goto err;
+ }
+
+ if (uuid_parse (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
+ "glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+ }
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp (temp_str, "auto"))
+ gf_string2boolean (temp_str, &conf->search_unhashed);
+ else
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+
+ GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
+ err);
+
+ GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
+ err);
+
+ GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
+ err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
+ uint32, err);
+
+ GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
+ bool, err);
+
+ GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str (this->options, "rebalance-filter", &temp_str)
+ == 0) {
+ if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse"
+ " rebalance-filter (%s)", temp_str);
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex (this, this->options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, this->options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+ LOCK_INIT (&conf->layout_lock);
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new (dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf (&conf->link_xattr_name, "%s.linkto", conf->xattr_name);
+ gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf->du_stats);
+
+ GF_FREE (conf->defrag);
+
+ GF_FREE (conf->xattr_name);
+ GF_FREE (conf->link_xattr_name);
+ GF_FREE (conf->wild_xattr_name);
+
+ GF_FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
+ "on", "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes."
+ },
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description = "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ },
+ { .key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ },
+ { .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files."
+ },
+ { .key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."
+ },
+ { .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread."
+ },
+ { .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description = "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick."
+ },
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory."
+ },
+ { .key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries."
+ },
+ { .key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed."
+ },
+ { .key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed."
+ },
+ { .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description = "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it."
+ },
+
+ /* NUFA option */
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+
+ /* switch option */
+ { .key = {"pattern.switch.case"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index c281bb152..fc0ca2f77 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,544 +14,23 @@
#include "config.h"
#endif
-/* TODO: add NS locking */
-
#include "statedump.h"
-#include "dht-common.c"
-
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-struct volume_options options[];
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
-{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", layout, out);
- GF_VALIDATE_OR_GOTO ("dht", prefix, out);
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- gf_proc_dump_build_key(key, prefix, "type");
- gf_proc_dump_write(key, "%d", layout->type);
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
- }
-
-out:
- return;
-}
-
-
-int32_t
-dht_priv_dump (xlator_t *this)
-{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
-
- if (!conf)
- return -1;
-
- ret = TRY_LOCK(&conf->subvolume_lock);
-
- if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to lock dht subvolume %s",
- this->name);
- return ret;
- }
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_build_key(key, key_prefix, "subvolume_cnt");
- gf_proc_dump_write(key,"%d", conf->subvolume_cnt);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- gf_proc_dump_build_key(key, key_prefix, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- gf_proc_dump_build_key(key, key_prefix,
- "file_layouts[%d]",i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- gf_proc_dump_build_key(key, key_prefix,
- "dir_layouts[%d]",i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
- gf_proc_dump_build_key(key, key_prefix,
- "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
-
- }
-
- gf_proc_dump_build_key(key, key_prefix, "search_unhashed");
- gf_proc_dump_write(key, "%d", conf->search_unhashed);
- gf_proc_dump_build_key(key, key_prefix, "gen");
- gf_proc_dump_write(key, "%d", conf->gen);
- gf_proc_dump_build_key(key, key_prefix, "min_free_disk");
- gf_proc_dump_write(key, "%lu", conf->min_free_disk);
- gf_proc_dump_build_key(key, key_prefix, "disk_unit");
- gf_proc_dump_write(key, "%c", conf->disk_unit);
- gf_proc_dump_build_key(key, key_prefix, "refresh_interval");
- gf_proc_dump_write(key, "%d", conf->refresh_interval);
- gf_proc_dump_build_key(key, key_prefix, "unhashed_sticky_bit");
- gf_proc_dump_write(key, "%d", conf->unhashed_sticky_bit);
- if (conf ->du_stats) {
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.avail_percent");
- gf_proc_dump_write(key, "%lf", conf->du_stats->avail_percent);
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.avail_space");
- gf_proc_dump_write(key, "%lu", conf->du_stats->avail_space);
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.log");
- gf_proc_dump_write(key, "%lu", conf->du_stats->log);
- }
- gf_proc_dump_build_key(key, key_prefix, "last_stat_fetch");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_stat_fetch.tv_sec));
-
- UNLOCK(&conf->subvolume_lock);
-
-out:
- return ret;
-}
-
-int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- dht_layout_t *layout = NULL;
- uint64_t tmp_layout = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", inode, out);
-
- ret = inode_ctx_get (inode, this, &tmp_layout);
-
- if (ret != 0)
- return ret;
-
- layout = (dht_layout_t *)(long)tmp_layout;
-
- if (!layout)
- return -1;
-
- gf_proc_dump_build_key(key_prefix, "xlator.cluster.dht",
- "%s.inode.%ld", this->name, inode->ino);
- dht_layout_dump(layout, key_prefix);
-
-out:
- return ret;
-}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = dht_notify (this, event, data);
-
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
- this->private = NULL;
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-out:
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-int
-validate_options (xlator_t *this, char **op_errstr)
-{
- int ret = 0;
- volume_opt_list_t *vol_opt = NULL;
- volume_opt_list_t *tmp;
-
- if (!this) {
- gf_log (this->name, GF_LOG_DEBUG, "'this' not a valid ptr");
- ret =-1;
- goto out;
- }
-
- if (list_empty (&this->volume_options))
- goto out;
-
- vol_opt = list_entry (this->volume_options.next,
- volume_opt_list_t, list);
- list_for_each_entry_safe (vol_opt, tmp, &this->volume_options, list) {
- ret = validate_xlator_volume_options_attacherr (this,
- vol_opt->given_opt,
- op_errstr);
- }
-
-out:
-
- return ret;
-}
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- gf_boolean_t search_unhashed;
- uint32_t temp_free_disk = 0;
- int ret = -1;
- uint32_t dir_spread = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", options, out);
-
- conf = this->private;
- if (!conf)
- return 0;
-
- if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean*/
- if (strcasecmp (temp_str, "auto")) {
- if (!gf_string2boolean (temp_str, &search_unhashed)) {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unahashed reconfigured (%s)",
- temp_str);
- conf->search_unhashed = search_unhashed;
- } else {
- gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
- " lookup-unahashed should be boolean,"
- " not (%s), defaulting to (%d)",
- temp_str, conf->search_unhashed);
- //return -1;
- ret = -1;
- goto out;
- }
- } else {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unahashed reconfigured auto ");
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
- }
-
- if (dict_get_str (options, "min-free-disk", &temp_str) == 0) {
- if (gf_string2percent (temp_str, &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- }
- } else {
- gf_string2bytesize (temp_str, &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
-
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " min-free-disk reconfigured to %s",
- temp_str);
- }
-
- if (dict_get_str (options, "directory-layout-spread", &temp_str) == 0) {
- ret = gf_string2uint32 (temp_str, &dir_spread);
- if (ret ||
- (dir_spread > conf->subvolume_cnt) ||
- (dir_spread < 1)) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong 'directory-layout-spread' option given "
- "(%s). setting to earlier value (%d)",
- temp_str, conf->dir_spread_cnt);
- ret = -1;
- goto out;
- }
- conf->dir_spread_cnt = dir_spread;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
- char *def_val = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, err);
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- conf->unhashed_sticky_bit = 0;
-
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->use_readdirp = 1;
-
- if (dict_get_str (this->options, "use-readdirp",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->use_readdirp);
- }
-
- if (xlator_get_volopt_info (&this->volume_options, "min-free-disk",
- &def_val, NULL)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- " min-free-disk not found");
- ret = -1;
- goto err;
- } else {
- if (gf_string2percent (def_val, &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str, &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
-
- if (dict_get_str (this->options, "min-free-disk", &temp_str) == 0) {
- if (gf_string2percent (temp_str, &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- }
- } else {
- gf_string2bytesize (temp_str, &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
-
- conf->dir_spread_cnt = conf->subvolume_cnt;
- if (dict_get_str (this->options, "directory-layout-spread",
- &temp_str) == 0) {
- ret = gf_string2uint32 (temp_str, &conf->dir_spread_cnt);
- if (ret ||
- (conf->dir_spread_cnt > conf->subvolume_cnt) ||
- (conf->dir_spread_cnt < 1)) {
- gf_log (this->name, GF_LOG_WARNING,
- "wrong 'directory-layout-spread' option given "
- "(%s). setting it to subvolume count",
- temp_str);
- conf->dir_spread_cnt = conf->subvolume_cnt;
- }
- }
-
- conf->assert_no_child_down = 0;
-
- ret = dict_get_str_boolean (this->options, "assert-no-child-down", 0);
- if (ret != -1) {
- if (conf->assert_no_child_down != ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Changing assert-no-child-down from %d to %d",
- conf->assert_no_child_down, ret);
- }
- conf->assert_no_child_down = ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "'assert-no-child-down' takes only boolean arguments");
- }
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
- }
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
-}
+#include "dht-common.h"
+class_methods_t class_methods = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
.lookup = dht_lookup,
.mknod = dht_mknod,
.create = dht_create,
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
.open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
.statfs = dht_statfs,
- .lk = dht_lk,
.opendir = dht_opendir,
.readdir = dht_readdir,
.readdirp = dht_readdirp,
@@ -571,14 +41,38 @@ struct xlator_fops fops = {
.mkdir = dht_mkdir,
.rmdir = dht_rmdir,
.rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
.entrylk = dht_entrylk,
.fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
.xattrop = dht_xattrop,
.fxattrop = dht_fxattrop,
.setattr = dht_setattr,
.fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
@@ -592,31 +86,4 @@ struct xlator_cbks cbks = {
// .releasedir = dht_releasedir,
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- .default_value = "10%",
- .description = "Percentage/Size of disk space that must be "
- "kept free."
- },
- { .key = {"unhashed-sticky-bit"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"use-readdirp"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"assert-no-child-down"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"directory-layout-spread"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {NULL} },
-};
+;
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 845c6b74e..e934acdf0 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,10 +14,12 @@
#include "config.h"
#endif
-#include "dht-common.c"
+#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
+extern struct volume_options options[];
+
int
nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
@@ -44,7 +37,6 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_cnt = 0;
int ret = 0;
-
conf = this->private;
prev = cookie;
@@ -62,7 +54,8 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
@@ -141,7 +134,7 @@ out:
err:
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, NULL);
+ inode, stbuf, xattr, postparent);
return 0;
}
@@ -150,7 +143,6 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *xattr_req)
{
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -169,21 +161,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
- goto err;
- }
-
if (xattr_req) {
local->xattr_req = dict_ref (xattr_req);
} else {
@@ -191,14 +174,11 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
}
hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
- local->cached_subvol = cached_subvol;
local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -215,7 +195,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
goto do_fresh_lookup;
}
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->call_cnt = layout->cnt;
call_cnt = local->call_cnt;
@@ -224,7 +204,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
* revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -245,7 +225,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
} else {
do_fresh_lookup:
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -254,7 +234,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
}
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ conf->link_xattr_name, 256);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -273,7 +253,8 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -282,7 +263,7 @@ nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -293,28 +274,27 @@ nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd,
- local->params);
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
nufa_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -324,7 +304,7 @@ nufa_create (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -343,26 +323,19 @@ nufa_create (call_frame_t *frame, xlator_t *this,
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
local->params = dict_ref (params);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
@@ -371,14 +344,14 @@ nufa_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -387,41 +360,45 @@ int
nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
local->cached_subvol->fops->mknod,
&local->loc, local->mode, local->rdev,
- local->params);
+ local->umask, local->params);
return 0;
}
-
+err:
WIPE (postparent);
WIPE (preparent);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent, xdata);
return 0;
}
int
nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -431,7 +408,7 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -451,23 +428,20 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
if (avail_subvol != subvol) {
/* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
local->params = dict_ref (params);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this,
avail_subvol, subvol, loc);
return 0;
}
@@ -475,211 +449,185 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
+gf_boolean_t
+same_first_part (char *str1, char term1, char *str2, char term2)
{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
+
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
+ }
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
+ }
+ ++str1;
+ ++str2;
+ }
}
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
- conf = this->private;
+static void
+nufa_find_local_brick (xlator_t *xl, void *data)
+{
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
+
+ if (strcmp (xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using specified subvol %s",
+ local_volname);
+ return;
+ }
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
+ if (!addr_match)
+ return;
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
+ ret = dict_get_str (xl->options, "remote-host", &brick_host);
+ if ((ret == 0) &&
+ (gf_is_same_address (local_volname, brick_host) ||
+ gf_is_local_addr (brick_host))) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using the first local "
+ "subvol %s", xl->name);
+ return;
+ }
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
+}
- GF_FREE (conf);
- }
+static void
+nufa_to_dht (xlator_t *this)
+{
+ GF_ASSERT (this);
+ GF_ASSERT (this->fops);
- return;
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
}
int
-init (xlator_t *this)
+nufa_find_local_subvol (xlator_t *this,
+ void (*fn) (xlator_t *each, void* data), void *data)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- char my_hostname[256];
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first (this, fn, data);
+ if (!conf->private) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't find a local "
+ "brick");
return -1;
}
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf),
- gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
+ parent = trav->xlator;
+ if (strcmp (parent->type, "cluster/nufa") == 0) {
+ gf_log (this->name, GF_LOG_INFO, "Found local subvol, "
+ "%s", candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
+ }
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
+ candidate = parent;
+ trav = parent->parents;
}
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
+ return ret;
+}
- conf->gen = 1;
+int
+nufa_init (xlator_t *this)
+{
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {0, };
- local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
- if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
+ if ((data = dict_get (this->options, "local-volume-name"))) {
local_volname = data->data;
- }
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
+ } else {
+ addr_match = _gf_true;
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret == 0)
+ local_volname = my_hostname;
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->private = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
+ else
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)",
+ strerror (errno));
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
}
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht (this);
}
-
- this->private = conf;
-
return 0;
+}
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
- return -1;
-}
+class_methods_t class_methods = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
@@ -726,19 +674,3 @@ struct xlator_fops fops = {
struct xlator_cbks cbks = {
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index e6a2e5d5c..d3ea90ba8 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,7 +14,7 @@
#include "config.h"
#endif
-#include "dht-common.c"
+#include "dht-common.h"
#include "dht-mem-types.h"
#include <sys/time.h>
@@ -31,6 +22,8 @@
#include <fnmatch.h>
#include <string.h>
+extern struct volume_options options[];
+
struct switch_sched_array {
xlator_t *xl;
int32_t eligible;
@@ -76,29 +69,37 @@ get_switch_matching_subvol (const char *path, dht_conf_t *conf,
struct switch_struct *cond = NULL;
struct switch_struct *trav = NULL;
char *pathname = NULL;
- int idx = 0;
+ int idx = 0;
+ xlator_t *subvol = NULL;
cond = conf->private;
+ subvol = hashed_subvol;
if (!cond)
- return hashed_subvol;
+ goto out;
- trav = cond;
pathname = gf_strdup (path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
while (trav) {
if (fnmatch (trav->path_pattern,
pathname, FNM_NOESCAPE) == 0) {
for (idx = 0; idx < trav->num_child; idx++) {
if (trav->array[idx].xl == hashed_subvol)
- return hashed_subvol;
+ goto out;
}
idx = trav->node_index++;
trav->node_index %= trav->num_child;
- return trav->array[idx].xl;
+ subvol = trav->array[idx].xl;
+ goto out;
}
trav = trav->next;
}
+out:
GF_FREE (pathname);
- return hashed_subvol;
+
+ return subvol;
}
@@ -136,7 +137,8 @@ switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
@@ -246,21 +248,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
- goto err;
- }
-
if (xattr_req) {
local->xattr_req = dict_ref (xattr_req);
} else {
@@ -268,14 +261,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
}
hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+ cached_subvol = local->cached_subvol;
- local->cached_subvol = cached_subvol;
local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -301,11 +292,11 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
* attribute, revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
+ "failed to set dict value for %s",
+ conf->xattr_name);
for (i = 0; i < layout->cnt; i++) {
subvol = layout->list[i].xlator;
@@ -320,18 +311,18 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
} else {
do_fresh_lookup:
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
+ "failed to set dict value for %s",
+ conf->xattr_name);
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ conf->link_xattr_name, 256);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht.linkto");
+ "failed to set dict value for %s",
+ conf->link_xattr_name);
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -377,7 +368,8 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
return 0;
}
@@ -386,7 +378,7 @@ switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -397,28 +389,27 @@ switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd,
- local->params);
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
switch_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -428,7 +419,7 @@ switch_create (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -446,25 +437,18 @@ switch_create (call_frame_t *frame, xlator_t *this,
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- switch_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, switch_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
@@ -473,14 +457,14 @@ switch_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -489,38 +473,42 @@ int
switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
local->cached_subvol->fops->mknod,
&local->loc, local->mode, local->rdev,
- local->params);
+ local->umask, local->params);
return 0;
}
-
+err:
DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent, xdata);
return 0;
}
int
-switch_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -530,7 +518,7 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -549,59 +537,45 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
if (avail_subvol != subvol) {
/* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
local->params = dict_ref (params);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
local->cached_subvol = avail_subvol;
dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
return 0;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
void
-fini (xlator_t *this)
+switch_fini (xlator_t *this)
{
- int i = 0;
dht_conf_t *conf = NULL;
struct switch_struct *trav = NULL;
struct switch_struct *prev = NULL;
@@ -612,30 +586,14 @@ fini (xlator_t *this)
trav = (struct switch_struct *)conf->private;
conf->private = NULL;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
prev = trav;
trav = trav->next;
GF_FREE (prev);
}
-
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
}
- return;
+ dht_fini(this);
}
int
@@ -695,8 +653,10 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
dup_str = gf_strdup (switch_str);
switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
gf_switch_mt_switch_struct);
- if (!switch_opt)
+ if (!switch_opt) {
+ GF_FREE (dup_str);
goto err;
+ }
pattern = strtok_r (dup_str, ":", &tmp_str1);
childs = strtok_r (NULL, ":", &tmp_str1);
@@ -706,6 +666,7 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
"for all the unconfigured child nodes,"
" hence neglecting current option");
switch_str = strtok_r (NULL, ";", &tmp_str);
+ GF_FREE (switch_opt);
GF_FREE (dup_str);
continue;
}
@@ -778,6 +739,7 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
/* First entry */
switch_buf = switch_opt;
}
+ switch_opt = NULL;
switch_str = strtok_r (NULL, ";", &tmp_str);
}
@@ -834,19 +796,20 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
/* First entry */
switch_buf = switch_opt;
}
+ switch_opt = NULL;
}
/* */
conf->private = switch_buf;
return 0;
err:
+ GF_FREE (switch_buf_array);
+ GF_FREE (switch_opt);
+
if (switch_buf) {
- if (switch_buf_array)
- GF_FREE (switch_buf_array);
trav = switch_buf;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
switch_opt = trav;
trav = trav->next;
GF_FREE (switch_opt);
@@ -856,68 +819,18 @@ err:
}
-int
-init (xlator_t *this)
+int32_t
+switch_init (xlator_t *this)
{
dht_conf_t *conf = NULL;
data_t *data = NULL;
- char *temp_str = NULL;
int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "SWITCH needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_switch_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
- conf->unhashed_sticky_bit = 0;
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
+ conf = this->private;
data = dict_get (this->options, "pattern.switch.case");
if (data) {
@@ -928,65 +841,23 @@ init (xlator_t *this)
}
}
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_switch_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
- }
-
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
- }
-
this->private = conf;
-
return 0;
err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
+ dht_fini(this);
return -1;
}
+class_methods_t class_methods = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
+
+
struct xlator_fops fops = {
.lookup = switch_lookup,
.create = switch_create,
@@ -1031,19 +902,3 @@ struct xlator_fops fops = {
struct xlator_cbks cbks = {
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"pattern.switch.case"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
index 5f78a2965..5c1364b7f 100644
--- a/xlators/cluster/ha/src/Makefile.am
+++ b/xlators/cluster/ha/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = ha.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-ha_la_LDFLAGS = -module -avoidversion
+ha_la_LDFLAGS = -module -avoid-version
ha_la_SOURCES = ha-helpers.c ha.c
ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = ha.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
index 1e4af1b62..19be1ed27 100644
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ b/xlators/cluster/ha/src/ha-helpers.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
index 9bfb3972b..e5e97d237 100644
--- a/xlators/cluster/ha/src/ha-mem-types.h
+++ b/xlators/cluster/ha/src/ha-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __HA_MEM_TYPES_H__
#define __HA_MEM_TYPES_H__
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
index 38d4229d3..3eccb516b 100644
--- a/xlators/cluster/ha/src/ha.c
+++ b/xlators/cluster/ha/src/ha.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* generate errors randomly, code is simple now, better alogorithm
* can be written to decide what error to be returned and when
*/
@@ -1876,13 +1866,9 @@ err:
}
if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- }
+ GF_FREE (hafdp->fdstate);
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- }
+ GF_FREE (hafdp->path);
GF_FREE (hafdp);
}
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
index 39b6851e7..e2ed7eaa6 100644
--- a/xlators/cluster/ha/src/ha.h
+++ b/xlators/cluster/ha/src/ha.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __HA_H_
#define __HA_H_
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
index 26e19137a..a278b05e2 100644
--- a/xlators/cluster/map/src/Makefile.am
+++ b/xlators/cluster/map/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = map.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-map_la_LDFLAGS = -module -avoidversion
+map_la_LDFLAGS = -module -avoid-version
map_la_SOURCES = map.c map-helper.c
map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = map.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
index 81212fcfd..851397b68 100644
--- a/xlators/cluster/map/src/map-helper.c
+++ b/xlators/cluster/map/src/map-helper.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2009-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
index 669b93dc2..3e89f4736 100644
--- a/xlators/cluster/map/src/map-mem-types.h
+++ b/xlators/cluster/map/src/map-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_MEM_TYPES_H__
#define __MAP_MEM_TYPES_H__
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
index ead9da0b9..6150a33ce 100644
--- a/xlators/cluster/map/src/map.c
+++ b/xlators/cluster/map/src/map.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -2375,8 +2365,7 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
- if (priv->xlarray)
- GF_FREE (priv->xlarray);
+ GF_FREE (priv->xlarray);
trav_map = priv->map;
while (trav_map) {
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
index bccac437c..7703a543e 100644
--- a/xlators/cluster/map/src/map.h
+++ b/xlators/cluster/map/src/map.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_H__
#define __MAP_H__
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
index 0db3c9eeb..2d151422a 100644
--- a/xlators/cluster/stripe/src/Makefile.am
+++ b/xlators/cluster/stripe/src/Makefile.am
@@ -2,16 +2,19 @@
xlator_LTLIBRARIES = stripe.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-stripe_la_LDFLAGS = -module -avoidversion
+stripe_la_LDFLAGS = -module -avoid-version
+
+stripe_la_SOURCES = stripe.c stripe-helpers.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-stripe_la_SOURCES = stripe.c $(top_builddir)/xlators/lib/src/libxlator.c
stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = stripe.h stripe-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
CLEANFILES =
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
new file mode 100644
index 000000000..a83abdc72
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-helpers.c
@@ -0,0 +1,675 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "stripe.h"
+#include "byte-order.h"
+#include "mem-types.h"
+
+void
+stripe_local_wipe (stripe_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+out:
+ return;
+}
+
+
+
+int
+stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+
+ dst = data;
+
+ if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "memory allocation failed");
+ goto out;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "stripe aggregate dict set failed");
+ GF_FREE (size);
+ goto out;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
+ goto out;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+ } else if (strcmp (key, GF_CONTENT_KEY)) {
+ /* No need to aggregate 'CONTENT' data */
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+out:
+ return 0;
+}
+
+
+void
+stripe_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, stripe_aggregate, dst);
+out:
+ return;
+}
+
+
+int32_t
+stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *sbuf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!buffer || !local || !local->xattr_list)
+ goto out;
+
+ sbuf = buffer;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ memcpy (buffer, xattr->xattr_value, len);
+ buffer += len;
+ *buffer++ = ' ';
+ }
+ }
+
+ *--buffer = '\0';
+ if (total)
+ *total = buffer - sbuf;
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int32_t
+stripe_free_xattr_str (stripe_local_t *local)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!local || !local->xattr_list)
+ goto out;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+
+ if (xattr && xattr->xattr_value)
+ GF_FREE (xattr->xattr_value);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz)
+{
+ int32_t ret = -1, i = 0, len = 0;
+ dict_t *tmp1 = NULL, *tmp2 = NULL;
+ char *buf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (xattr_serz == NULL) {
+ goto out;
+ }
+
+ tmp2 = dict_new ();
+
+ if (tmp2 == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ ret = dict_reset (tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_reset failed (%s)",
+ strerror (-ret));
+ }
+
+ ret = dict_unserialize (xattr->xattr_value,
+ xattr->xattr_len,
+ &tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_unserialize failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ tmp1 = dict_copy (tmp2, tmp1);
+ if (tmp1 == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_copy failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ len = dict_serialized_length (tmp1);
+ if (len > 0) {
+ buf = GF_CALLOC (1, len, gf_common_mt_dict_t);
+ if (buf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_serialize (tmp1, buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s)", strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ *xattr_serz = buf;
+ }
+
+ ret = 0;
+out:
+ if (tmp1 != NULL) {
+ dict_unref (tmp1);
+ }
+
+ if (tmp2 != NULL) {
+ dict_unref (tmp2);
+ }
+
+ return ret;
+}
+
+
+int32_t
+stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz)
+{
+ int ret = -1;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+ char stripe_size_str[20] = {0,};
+ char *pathinfo_serz = NULL;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ goto out;
+ }
+
+ (void) snprintf (stripe_size_str, 20, "%ld",
+ (local->fctx) ? local->fctx->stripe_size : 0);
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
+ + strlen (stripe_size_str) + 7;
+ local->xattr_total_len += (padding + 2);
+
+ pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
+ gf_common_mt_char);
+ if (!pathinfo_serz)
+ goto out;
+
+ /* xlator info */
+ (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ",
+ this->name, stripe_size_str);
+
+ ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot aggregate pathinfo list");
+ goto out;
+ }
+
+ *(pathinfo_serz + padding + tlen) = ')';
+ *(pathinfo_serz + padding + tlen + 1) = '\0';
+
+ *xattr_serz = pathinfo_serz;
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+/**
+ * stripe_get_matching_bs - Get the matching block size for the given path.
+ */
+int32_t
+stripe_get_matching_bs (const char *path, stripe_private_t *priv)
+{
+ struct stripe_options *trav = NULL;
+ uint64_t block_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+ GF_VALIDATE_OR_GOTO ("stripe", path, out);
+
+ LOCK (&priv->lock);
+ {
+ block_size = priv->block_size;
+ trav = priv->pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) {
+ block_size = trav->block_size;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return block_size;
+}
+
+int32_t
+stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
+ dict_t *dict)
+{
+ char key[256] = {0,};
+ data_t *data = NULL;
+ int32_t index = 0;
+ stripe_private_t *priv = NULL;
+
+ priv = this->private;
+
+
+ if (!local->fctx) {
+ local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
+ gf_stripe_mt_stripe_fd_ctx_t);
+ if (!local->fctx) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
+ local->fctx->static_array = 0;
+ }
+ /* Stripe block size */
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-size");
+ goto out;
+ } else {
+ if (!local->fctx->stripe_size) {
+ local->fctx->stripe_size =
+ data_to_int64 (data);
+ }
+
+ if (local->fctx->stripe_size != data_to_int64 (data)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stripe-size mismatch in blocks");
+ local->xattr_self_heal_needed = 1;
+ }
+ }
+
+ /* Stripe count */
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ data = dict_get (dict, key);
+
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-count");
+ goto out;
+ }
+ if (!local->fctx->xl_array) {
+ local->fctx->stripe_count = data_to_int32 (data);
+ if (!local->fctx->stripe_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count,
+ sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
+
+ if (!local->fctx->xl_array) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+ if (local->fctx->stripe_count != data_to_int32 (data)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr (%d != %d)",
+ local->fctx->stripe_count, data_to_int32 (data));
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* index */
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-index");
+ goto out;
+ }
+ index = data_to_int32 (data);
+ if (index > priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-index xattr (%d)", index);
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+ if (local->fctx->xl_array) {
+ if (!local->fctx->xl_array[index])
+ local->fctx->xl_array[index] = prev->this;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ data = dict_get(dict, key);
+ if (!data) {
+ /*
+ * The file was probably created prior to coalesce support.
+ * Assume non-coalesce mode for this file to maintain backwards
+ * compatibility.
+ */
+ gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
+ "attr, assume non-coalesce mode");
+ local->fctx->stripe_coalesce = 0;
+ } else {
+ local->fctx->stripe_coalesce = data_to_int32(data);
+ }
+
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
+ uint32_t stripe_count, uint32_t stripe_index,
+ uint32_t stripe_coalesce)
+{
+ char key[256] = {0,};
+ int32_t ret = -1;
+
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ ret = dict_set_int64 (dict, key, stripe_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ ret = dict_set_int32 (dict, key, stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ ret = dict_set_int32 (dict, key, stripe_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ ret = dict_set_int32(dict, key, stripe_coalesce);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req_dict", key);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+
+static int
+set_default_block_size (stripe_private_t *priv, char *num)
+{
+
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO ("stripe", THIS, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, num, out);
+
+
+ if (gf_string2bytesize (num, &priv->block_size) != 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+
+}
+
+
+int
+set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *num = NULL;
+ struct stripe_options *temp_stripeopt = NULL;
+ struct stripe_options *stripe_opt = NULL;
+
+ if (!this || !priv || !data)
+ goto out;
+
+ /* Get the pattern for striping.
+ "option block-size *avi:10MB" etc */
+ stripe_str = strtok_r (data, ",", &tmp_str);
+ while (stripe_str) {
+ dup_str = gf_strdup (stripe_str);
+ stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
+ gf_stripe_mt_stripe_options);
+ if (!stripe_opt) {
+ goto out;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!num) {
+ num = pattern;
+ pattern = "*";
+ ret = set_default_block_size (priv, num);
+ if (ret)
+ goto out;
+ }
+ if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
+ "%s. Should be atleast %llu bytes", num,
+ STRIPE_MIN_BLOCK_SIZE);
+ goto out;
+ }
+ if (stripe_opt->block_size % 512) {
+ gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
+ " be a multiple of 512 bytes", num);
+ goto out;
+ }
+
+ memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "block-size : pattern %s : size %"PRId64,
+ stripe_opt->path_pattern, stripe_opt->block_size);
+
+ if (priv->pattern)
+ temp_stripeopt = NULL;
+ else
+ temp_stripeopt = priv->pattern;
+
+ stripe_opt->next = temp_stripeopt;
+
+ priv->pattern = stripe_opt;
+ stripe_opt = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+
+ GF_FREE (dup_str);
+
+ GF_FREE (stripe_opt);
+
+ return ret;
+}
+
+int32_t
+stripe_iatt_merge (struct iatt *from, struct iatt *to)
+{
+ if (to->ia_size < from->ia_size)
+ to->ia_size = from->ia_size;
+ if (to->ia_mtime < from->ia_mtime)
+ to->ia_mtime = from->ia_mtime;
+ if (to->ia_ctime < from->ia_ctime)
+ to->ia_ctime = from->ia_ctime;
+ if (to->ia_atime < from->ia_atime)
+ to->ia_atime = from->ia_atime;
+ return 0;
+}
+
+off_t
+coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
+{
+ size_t line_size = 0;
+ uint64_t stripe_num = 0;
+ off_t coalesced_offset = 0;
+
+ line_size = stripe_size * stripe_count;
+ stripe_num = offset / line_size;
+
+ coalesced_offset = (stripe_num * stripe_size) +
+ (offset % stripe_size);
+
+ return coalesced_offset;
+}
+
+off_t
+uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index)
+{
+ uint64_t nr_full_stripe_chunks = 0, mod = 0;
+
+ if (!size)
+ return size;
+
+ /*
+ * Estimate the number of fully written stripes from the
+ * local file size. Each stripe_size chunk corresponds to
+ * a stripe.
+ */
+ nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
+ mod = size % stripe_size;
+
+ if (!mod) {
+ /*
+ * There is no remainder, thus we could have overestimated
+ * the size of the file in terms of chunks. Trim the number
+ * of chunks by the following stripe members and leave it
+ * up to those nodes to respond with a larger size (if
+ * necessary).
+ */
+ nr_full_stripe_chunks -= stripe_count -
+ (stripe_index + 1);
+ size = nr_full_stripe_chunks * stripe_size;
+ } else {
+ /*
+ * There is a remainder and thus we own the last chunk of the
+ * file. Add the preceding stripe members of the final stripe
+ * along with the remainder to calculate the exact size.
+ */
+ nr_full_stripe_chunks += stripe_index;
+ size = nr_full_stripe_chunks * stripe_size + mod;
+ }
+
+ return size;
+}
+
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
index 29c95c257..e9ac9cf46 100644
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ b/xlators/cluster/stripe/src/stripe-mem-types.h
@@ -1,21 +1,11 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,12 +15,12 @@
#include "mem-types.h"
enum gf_stripe_mem_types_ {
- gf_stripe_mt_stripe_local_t = gf_common_mt_end + 1,
- gf_stripe_mt_iovec,
- gf_stripe_mt_readv_replies,
+ gf_stripe_mt_iovec = gf_common_mt_end + 1,
+ gf_stripe_mt_stripe_replies,
gf_stripe_mt_stripe_fd_ctx_t,
gf_stripe_mt_char,
gf_stripe_mt_int8_t,
+ gf_stripe_mt_int32_t,
gf_stripe_mt_xlator_t,
gf_stripe_mt_stripe_private_t,
gf_stripe_mt_stripe_options,
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index 529111ba7..69b510e23 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -1,25 +1,16 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/**
* xlators/cluster/stripe:
- * Stripe translator, stripes the data accross its child nodes,
+ * Stripe translator, stripes the data across its child nodes,
* as per the options given in the volfile. The striping works
* fairly simple. It writes files at different offset as per
* calculation. So, 'ls -l' output at the real posix level will
@@ -32,6 +23,7 @@
* very much necessary, or else, use it in combination with AFR, to have a
* backup copy.
*/
+#include <fnmatch.h>
#include "stripe.h"
#include "libxlator.h"
@@ -40,73 +32,10 @@
struct volume_options options[];
-void
-stripe_local_wipe (stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->inode)
- inode_unref (local->inode);
-
- if (local->xattr)
- dict_unref (local->xattr);
-
- if (local->dict)
- dict_unref (local->dict);
-
-out:
- return;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs (const char *path, struct stripe_options *opts,
- uint64_t default_bs)
-{
- struct stripe_options *trav = NULL;
- char *pathname = NULL;
- uint64_t block_size = 0;
-
- block_size = default_bs;
-
- if (!path || !opts)
- goto out;
-
- /* FIXME: is a strdup really necessary? */
- pathname = gf_strdup (path);
- if (!pathname)
- goto out;
-
- trav = opts;
- while (trav) {
- if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
-
- GF_FREE (pathname);
-
-out:
- return block_size;
-}
-
-
-
int32_t
stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int callcnt = -1;
stripe_local_t *local = NULL;
@@ -135,7 +64,7 @@ int32_t
stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -150,7 +79,7 @@ stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, stripe_sh_chown_cbk, prev->this,
prev->this->fops->setattr, &local->loc,
- &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID));
+ &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
out:
return 0;
@@ -164,7 +93,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
stripe_private_t *priv = NULL;
- dict_t *dict = NULL;
+ dict_t *xdata = NULL;
int ret = 0;
if (!local || !this || !frame) {
@@ -182,8 +111,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
if (!rframe) {
goto out;
}
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
goto out;
}
@@ -192,14 +120,14 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
loc_copy (&rlocal->loc, &local->loc);
memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt));
- dict = dict_new ();
- if (!dict)
+ xdata = dict_new ();
+ if (!xdata)
goto out;
- ret = dict_set_static_bin (dict, "gfid-req", local->stbuf.ia_gfid, 16);
+ ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16);
if (ret)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set gfid-req");
+ "%s: failed to set gfid-req", local->loc.path);
while (trav) {
if (IA_ISREG (local->stbuf.ia_type)) {
@@ -207,101 +135,43 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
trav->xlator, trav->xlator->fops->mknod,
&local->loc,
st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type), 0,
- dict);
+ local->stbuf.ia_type),
+ 0, 0, xdata);
}
if (IA_ISDIR (local->stbuf.ia_type)) {
STACK_WIND (rframe, stripe_sh_make_entry_cbk,
trav->xlator, trav->xlator->fops->mkdir,
- &local->loc, st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- dict);
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, xdata);
}
trav = trav->next;
}
- if (dict)
- dict_unref (dict);
+ if (xdata)
+ dict_unref (xdata);
return 0;
out:
if (rframe)
STRIPE_STACK_DESTROY (rframe);
- if (dict)
- dict_unref (dict);
+ if (xdata)
+ dict_unref (xdata);
return 0;
}
-void
-stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
-
- dst = data;
-
- if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin (dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC (1, sizeof (int64_t),
- gf_common_mt_char);
- if (size == NULL) {
- gf_log ("stripe", GF_LOG_WARNING,
- "memory allocation failed");
- goto out;
- }
- ret = dict_set_bin (dst, key, size, sizeof (int64_t));
- if (ret < 0) {
- gf_log ("stripe", GF_LOG_WARNING,
- "stripe aggregate dict set failed");
- GF_FREE (size);
- goto out;
- }
- }
-
- ptr = data_to_bin (value);
- if (ptr == NULL) {
- gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
- goto out;
- }
-
- *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
- } else if (strcmp (key, GF_CONTENT_KEY)) {
- /* No need to aggregate 'CONTENT' data */
- ret = dict_set (dst, key, value);
- if (ret)
- gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
- }
-
-out:
- return;
-}
-
-
-void
-stripe_aggregate_xattr (dict_t *dst, dict_t *src)
-{
- if ((dst == NULL) || (src == NULL)) {
- goto out;
- }
-
- dict_foreach (src, stripe_aggregate, dst);
-out:
- return;
-}
-
-
int32_t
stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -332,30 +202,42 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG (buf->ia_type)) {
+ ret = stripe_ctx_handle (this, prev, local,
+ xdata);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error getting fctx info from"
+ " dict");
+ }
if (FIRST_CHILD(this) == prev->this) {
local->stbuf = *buf;
local->postparent = *postparent;
local->inode = inode_ref (inode);
- local->dict = dict_ref (dict);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
if (local->xattr) {
- stripe_aggregate_xattr (local->dict,
+ stripe_aggregate_xattr (local->xdata,
local->xattr);
dict_unref (local->xattr);
local->xattr = NULL;
}
}
- if (!local->dict && !local->xattr) {
- local->xattr = dict_ref (dict);
- } else if (local->dict) {
- stripe_aggregate_xattr (local->dict, dict);
+
+ if (!local->xdata && !local->xattr) {
+ local->xattr = dict_ref (xdata);
+ } else if (local->xdata) {
+ stripe_aggregate_xattr (local->xdata, xdata);
} else if (local->xattr) {
- stripe_aggregate_xattr (local->xattr, dict);
+ stripe_aggregate_xattr (local->xattr, xdata);
}
local->stbuf_blocks += buf->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->postparent_size < postparent->ia_size)
@@ -375,7 +257,8 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->op_ret == 0 && local->entry_self_heal_needed)
+ if (local->op_ret == 0 && local->entry_self_heal_needed &&
+ !uuid_is_null (local->loc.inode->gfid))
stripe_entry_self_heal (frame, this, local);
if (local->failed)
@@ -386,11 +269,13 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->postparent.ia_blocks = local->postparent_blocks;
local->postparent.ia_size = local->postparent_size;
+ inode_ctx_put (local->inode, this,
+ (uint64_t) (long)local->fctx);
}
STRIPE_STACK_UNWIND (lookup, frame, local->op_ret,
local->op_errno, local->inode,
- &local->stbuf, local->dict,
+ &local->stbuf, local->xdata,
&local->postparent);
}
out:
@@ -399,14 +284,15 @@ out:
int32_t
stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
int32_t op_errno = EINVAL;
int64_t filesize = 0;
- int ret = 0;
+ int ret = 0;
+ uint64_t tmpctx = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -418,8 +304,7 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -428,10 +313,37 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
frame->local = local;
loc_copy (&local->loc, loc);
- if (xattr_req && dict_get (xattr_req, GF_CONTENT_KEY)) {
- ret = dict_get_int64 (xattr_req, GF_CONTENT_KEY, &filesize);
+ inode_ctx_get (local->inode, this, &tmpctx);
+ if (tmpctx)
+ local->fctx = (stripe_fd_ctx_t*) (long)tmpctx;
+
+ /* quick-read friendly changes */
+ if (xdata && dict_get (xdata, GF_CONTENT_KEY)) {
+ ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize);
if (!ret && (filesize > priv->block_size))
- dict_del (xattr_req, GF_CONTENT_KEY);
+ dict_del (xdata, GF_CONTENT_KEY);
+ }
+
+ /* get stripe-size xattr on lookup. This would be required for
+ * open/read/write/pathinfo calls. Hence we send down the request
+ * even when type == IA_INVAL */
+
+ /*
+ * We aren't guaranteed to have xdata here. We need the format info for
+ * the file, so allocate xdata if necessary.
+ */
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
+ if (xdata && (IA_ISREG (loc->inode->ia_type) ||
+ (loc->inode->ia_type == IA_INVAL))) {
+ ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
+ if (ret)
+ gf_log (this->name , GF_LOG_ERROR, "Failed to build"
+ " xattr request for %s", loc->path);
+
}
/* Everytime in stripe lookup, all child nodes
@@ -439,11 +351,12 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup,
- loc, xattr_req);
+ trav->xlator->fops->lookup, loc, xdata);
trav = trav->next;
}
+ dict_unref(xdata);
+
return 0;
err:
STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
@@ -453,7 +366,7 @@ err:
int32_t
stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -488,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -504,18 +420,19 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STRIPE_STACK_UNWIND (stat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ local->op_errno, &local->stbuf, NULL);
}
out:
return 0;
}
int32_t
-stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -533,8 +450,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -543,23 +459,30 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc);
+ trav->xlator->fops->stat, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata)
{
stripe_local_t *local = NULL;
int32_t callcnt = 0;
@@ -597,14 +520,14 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
STRIPE_STACK_UNWIND (statfs, frame, local->op_ret,
- local->op_errno, &local->statvfs_buf);
+ local->op_errno, &local->statvfs_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -619,8 +542,7 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -632,13 +554,13 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc);
+ trav->xlator->fops->statfs, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -647,7 +569,7 @@ err:
int32_t
stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -685,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -707,19 +632,21 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (truncate, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -728,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
@@ -736,8 +662,7 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -746,15 +671,55 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->truncate, loc, offset);
- trav = trav->next;
- }
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no xlator at index %d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ /*
+ * The node that owns EOF is truncated to the exact
+ * coalesced offset. Nodes prior to this index should
+ * be rounded up to the size of the complete stripe,
+ * while nodes after this index should be rounded down
+ * to the size of the previous stripe.
+ */
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->truncate, loc, dest_offset,
+ NULL);
+ }
return 0;
err:
- STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -762,7 +727,7 @@ err:
int32_t
stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -801,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += preop->ia_blocks;
local->postbuf_blocks += postop->ia_blocks;
+ correct_file_size(preop, local->fctx, prev);
+ correct_file_size(postop, local->fctx, prev);
+
if (local->prebuf_size < preop->ia_size)
local->prebuf_size = preop->ia_size;
if (local->postbuf_size < postop->ia_size)
@@ -822,7 +790,7 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (setattr, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -831,11 +799,12 @@ out:
int32_t
stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -853,33 +822,47 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->call_count = priv->child_count;
+ if (!IA_ISDIR (loc->inode->ia_type) &&
+ !IA_ISREG (loc->inode->ia_type)) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, NULL);
+ return 0;
+ }
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
+ local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk,
trav->xlator, trav->xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -895,8 +878,7 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -907,13 +889,13 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid);
+ trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -921,7 +903,8 @@ int32_t
stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -958,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->pre_buf.ia_blocks += prenewparent->ia_blocks;
local->post_buf.ia_blocks += postnewparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf.ia_size < buf->ia_size)
local->stbuf.ia_size = buf->ia_size;
@@ -983,7 +968,7 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preparent,
&local->postparent, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -993,7 +978,8 @@ int32_t
stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -1024,24 +1010,25 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
while (trav) {
STACK_WIND (frame, stripe_stack_rename_cbk,
trav->xlator, trav->xlator->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
trav = trav->next;
}
return 0;
unwind:
STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
+ postoldparent, prenewparent, postnewparent, NULL);
return 0;
}
int32_t
stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -1061,8 +1048,7 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1073,24 +1059,67 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
local->call_count = priv->child_count;
+ if (IA_ISREG(oldloc->inode->ia_type)) {
+ inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
frame->local = local;
STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc);
+ trav->xlator->fops->rename, oldloc, newloc, NULL);
return 0;
err:
STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
+int32_t
+stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+ local->op_ret = 0;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
int32_t
stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1112,49 +1141,33 @@ stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
prev->this->name, strerror (op_errno));
local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
+ if (op_errno != ENOENT) {
local->failed = 1;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->preparent = *preparent;
- local->postparent = *postparent;
+ local->op_ret = op_ret;
}
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
-
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
}
}
UNLOCK (&frame->lock);
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
+ if (callcnt == 1) {
+ if (local->failed) {
+ op_errno = local->op_errno;
+ goto out;
}
-
- STRIPE_STACK_UNWIND (unlink, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent);
+ STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink, &local->loc,
+ local->xflag, local->xdata);
}
+ return 0;
out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
int32_t
-stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1182,26 +1195,32 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
frame->local = local;
local->call_count = priv->child_count;
+ trav = trav->next; /* Skip the first child */
while (trav) {
STACK_WIND (frame, stripe_unlink_cbk,
trav->xlator, trav->xlator->fops->unlink,
- loc);
+ loc, xflag, xdata);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1209,10 +1228,8 @@ err:
int32_t
stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
if (!this || !frame || !frame->local) {
@@ -1225,11 +1242,10 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
- trav = this->children;
local = frame->local;
+ local->op_ret = 0;
local->call_count--; /* First child successful */
- trav = trav->next; /* Skip first child */
local->preparent = *preparent;
local->postparent = *postparent;
@@ -1238,22 +1254,60 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
- while (trav) {
- STACK_WIND (frame, stripe_unlink_cbk, trav->xlator,
- trav->xlator->fops->rmdir, &local->loc,
- local->flags);
- trav = trav->next;
- }
-
+ STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ if (op_errno != ENOENT)
+ local->failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 1) {
+ if (local->failed)
+ goto out;
+ STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rmdir, &local->loc,
+ local->flags, NULL);
+ }
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1276,8 +1330,7 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1287,13 +1340,17 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
loc_copy (&local->loc, loc);
local->flags = flags;
local->call_count = priv->child_count;
+ trav = trav->next; /* skip the first child */
- STACK_WIND (frame, stripe_first_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc, flags);
+ while (trav) {
+ STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator,
+ trav->xlator->fops->rmdir, loc, flags, NULL);
+ trav = trav->next;
+ }
return 0;
err:
- STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1302,7 +1359,7 @@ int32_t
stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1323,7 +1380,7 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
if (!callcnt) {
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1335,7 +1392,7 @@ out:
int32_t
stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1374,7 +1431,7 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
@@ -1382,7 +1439,7 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1392,7 +1449,7 @@ int32_t
stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1406,7 +1463,7 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
prev = cookie;
- priv = this->private;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
@@ -1422,25 +1479,24 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->failed = 1;
local->op_errno = op_errno;
}
-
if (op_ret >= 0) {
local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
-
/* Can be used as a mechanism to understand if mknod
was successful in at least one place */
if (uuid_is_null (local->ia_gfid))
uuid_copy (local->ia_gfid, buf->ia_gfid);
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict");
+
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1465,7 +1521,7 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
@@ -1479,13 +1535,13 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent.ia_size = local->postparent_size;
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- }
+ inode_ctx_put (local->inode, this,
+ (uint64_t)(long) local->fctx);
- /* Create itself has failed.. so return
- without setxattring */
+ }
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1493,29 +1549,118 @@ out:
int32_t
+stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ int ret = 0;
+ int need_unref = 0;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
+ local = frame->local;
+ trav = this->children;
+
+ local->call_count--;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+
+ if (uuid_is_null (local->ia_gfid))
+ uuid_copy (local->ia_gfid, buf->ia_gfid);
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", local->loc.path);
+ }
+ need_unref = 1;
+
+ dict_copy (local->xattr, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count, i,
+ priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to build xattr request");
+
+ } else {
+ dict = local->xattr;
+ }
+
+ STACK_WIND (frame, stripe_mknod_ifreg_cbk,
+ trav->xlator, trav->xlator->fops->mknod,
+ &local->loc, local->mode, local->rdev, 0, dict);
+ trav = trav->next;
+ i++;
+
+ if (dict && need_unref)
+ dict_unref (dict);
+ }
+
+ return 0;
+
+out:
+
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *params)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
int32_t op_errno = EINVAL;
int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
int ret = 0;
int need_unref = 0;
@@ -1527,7 +1672,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
@@ -1546,86 +1690,63 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
+ local->xattr = dict_copy_with_ref (xdata, NULL);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
/* Everytime in stripe lookup, all child nodes should
be looked up */
local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
-
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dict %s", loc->path);
- }
- need_unref = 1;
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
- dict_copy (params, dict);
+ dict_copy (xdata, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed",
- loc->path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed",
- loc->path);
- } else {
- dict = params;
- }
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
+ }
- STACK_WIND (frame, stripe_mknod_ifreg_cbk,
- trav->xlator, trav->xlator->fops->mknod,
- loc, mode, rdev, dict);
- trav = trav->next;
- i++;
+ STACK_WIND (frame, stripe_mknod_first_ifreg_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, rdev, umask, dict);
if (dict && need_unref)
dict_unref (dict);
- }
-
- /* This case is handled, no need to continue further. */
return 0;
}
STACK_WIND (frame, stripe_single_mknod_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, params);
+ loc, mode, rdev, umask, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1634,7 +1755,7 @@ int32_t
stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1665,12 +1786,6 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
- if (FIRST_CHILD(this) == prev->this) {
- local->inode = inode_ref (inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
- }
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
@@ -1686,10 +1801,7 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
+ if (local->failed != -1) {
local->preparent.ia_blocks = local->preparent_blocks;
local->preparent.ia_size = local->preparent_size;
local->postparent.ia_blocks = local->postparent_blocks;
@@ -1700,16 +1812,76 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
+ }
+out:
+ return 0;
+}
+
+
+int32_t
+stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ trav = this->children;
+
+ local->call_count--; /* first child is successful */
+ trav = trav->next; /* skip first child */
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->op_ret = 0;
+
+ local->inode = inode_ref (inode);
+ local->stbuf = *buf;
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ local->stbuf_size = buf->ia_size;
+ local->preparent_size = preparent->ia_size;
+ local->postparent_size = postparent->ia_size;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, &local->loc, local->mode,
+ local->umask, local->xdata);
+ trav = trav->next;
}
+ return 0;
out:
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
return 0;
+
}
int
stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -1731,27 +1903,27 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->call_count = priv->child_count;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ local->mode = mode;
+ local->umask = umask;
+ loc_copy (&local->loc, loc);
frame->local = local;
/* Everytime in stripe lookup, all child nodes should be looked up */
- while (trav) {
- STACK_WIND (frame, stripe_mkdir_cbk,
- trav->xlator, trav->xlator->fops->mkdir,
- loc, mode, params);
- trav = trav->next;
- }
+ STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, loc, mode, umask, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1760,11 +1932,12 @@ int32_t
stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1791,6 +1964,16 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG(inode->ia_type)) {
+ inode_ctx_get(inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get stripe context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ }
+
if (FIRST_CHILD(this) == prev->this) {
local->inode = inode_ref (inode);
local->stbuf = *buf;
@@ -1801,6 +1984,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1826,14 +2011,14 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (link, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
out:
return 0;
}
int32_t
-stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1856,8 +2041,7 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1871,13 +2055,13 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
while (trav) {
STACK_WIND (frame, stripe_link_cbk,
trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
+ oldloc, newloc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1885,7 +2069,7 @@ int32_t
stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1906,7 +2090,7 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
if (!callcnt) {
STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno,
local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1917,12 +2101,11 @@ int32_t
stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
@@ -1943,26 +2126,26 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
prev->this->name, strerror (op_errno));
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
+ local->failed = 1;
local->op_errno = op_errno;
}
if (op_ret >= 0) {
+ if (IA_ISREG(buf->ia_type)) {
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from "
+ "dict");
+ }
+
local->op_ret = op_ret;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1985,7 +2168,7 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_create_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
@@ -2000,29 +2183,19 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!fctx) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
-
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)fctx);
+ stripe_copy_xl_array(local->fctx->xl_array,
+ priv->xl_array,
+ local->fctx->stripe_count);
+ inode_ctx_put(local->inode, this,
+ (uint64_t) local->fctx);
}
- unwind:
/* Create itself has failed.. so return
without setxattring */
STRIPE_STACK_UNWIND (create, frame, local->op_ret,
local->op_errno, local->fd,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
@@ -2030,6 +2203,122 @@ out:
}
+
+int32_t
+stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ loc_t *loc = NULL;
+ int32_t need_unref = 0;
+ int32_t ret = -1;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
+ local = frame->local;
+ trav = this->children;
+ loc = &local->loc;
+
+ --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
+ }
+
+ local->op_ret = 0;
+ /* Get the mapping in inode private */
+ /* Get the stat buf right */
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret == -1) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_create_fail_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ &local->loc, 0, NULL);
+ return 0;
+ }
+
+ if (local->op_ret >= 0) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
+
+ /* Send a setxattr request to nodes where the
+ files are created */
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
+
+ dict_copy (local->xattr, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = local->xattr;
+ }
+
+ STACK_WIND (frame, stripe_create_cbk, trav->xlator,
+ trav->xlator->fops->create, &local->loc,
+ local->flags, local->mode, local->umask, local->fd,
+ dict);
+ trav = trav->next;
+ if (need_unref && dict)
+ dict_unref (dict);
+ i++;
+ }
+
+out:
+ return 0;
+}
+
+
+
/**
* stripe_create - If a block-size is specified for the 'name', create the
* file in all the child nodes. If not, create it in only first child.
@@ -2038,18 +2327,14 @@ out:
*/
int32_t
stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
int32_t op_errno = EINVAL;
int ret = 0;
int need_unref = 0;
int i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
VALIDATE_OR_GOTO (frame, err);
@@ -2071,82 +2356,68 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
local->fd = fd_ref (fd);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ if (xdata)
+ local->xattr = dict_ref (xdata);
local->call_count = priv->child_count;
/* Send a setxattr request to nodes where the
files are created */
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
- trav = this->children;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dict %s", loc->path);
- }
- need_unref = 1;
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
- dict_copy (params, dict);
+ dict_copy (xdata, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed",
- loc->path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed",
- loc->path);
- } else {
- dict = params;
- }
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
+ }
- STACK_WIND (frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, loc, flags,
- mode, fd, dict);
- trav = trav->next;
- i++;
- if (need_unref && dict)
- dict_unref (dict);
+ STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create, loc, flags, mode,
+ umask, fd, dict);
+
+ if (need_unref && dict)
+ dict_unref (dict);
- }
return 0;
err:
STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, xdata);
return 0;
}
int32_t
stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2184,224 +2455,25 @@ stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- if (local->op_ret == -1) {
- if (local->fctx) {
- if (!local->fctx->static_array)
- GF_FREE (local->fctx->xl_array);
- GF_FREE (local->fctx);
- }
- } else {
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)local->fctx);
- }
-
STRIPE_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, xdata);
}
out:
return 0;
}
-int32_t
-stripe_open_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- char key[256] = {0,};
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = (call_frame_t *)cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- if (local->op_errno != EIO)
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- goto unlock;
- }
-
- if (!dict)
- goto unlock;
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf (key, "trusted.%s.stripe-size", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size =
- data_to_int64 (data);
- }
-
- if (local->fctx->stripe_size != data_to_int64 (data)) {
- gf_log (this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
- /* Stripe count */
- sprintf (key, "trusted.%s.stripe-count", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32 (data);
- if (!local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- local->fctx->xl_array =
- GF_CALLOC (local->fctx->stripe_count,
- sizeof (xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
- }
- if (local->fctx->stripe_count != data_to_int32 (data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32 (data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- /* index */
- sprintf (key, "trusted.%s.stripe-index", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- index = data_to_int32 (data);
- if (index > priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-index xattr (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- if (local->fctx->xl_array) {
- if (local->fctx->xl_array[index]) {
- gf_log (this->name, GF_LOG_ERROR,
- "duplicate entry @ index (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- local->fctx->xl_array[index] = prev->this;
- }
- local->entry_count++;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* TODO: if self-heal flag is set, do it */
- if (local->xattr_self_heal_needed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: stripe info need to be healed",
- local->loc.path);
- }
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret)
- goto err;
-
- if (local->entry_count != local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "entry-count (%d) != stripe-count (%d)",
- local->entry_count, local->fctx->stripe_count);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
- if (!local->fctx->stripe_size) {
- gf_log (this->name, GF_LOG_ERROR, "stripe size not set");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
-
- local->call_count = local->fctx->stripe_count;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc,
- local->flags, local->fd, 0);
- trav = trav->next;
- }
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
-out:
- return 0;
-}
/**
* stripe_open -
*/
int32_t
stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
int32_t op_errno = 1;
- dict_t *dict = NULL;
- int ret = 0;
- char key[256] = {0,};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2418,8 +2490,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2435,73 +2506,25 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
/* Striped files */
local->flags = flags;
local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
-
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict)
- goto err;
-
- sprintf (key, "trusted.%s.stripe-size", this->name);
- ret = dict_set_int64 (dict, key, 8);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- sprintf (key, "trusted.%s.stripe-count", this->name);
- ret = dict_set_int32 (dict, key, 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- sprintf (key, "trusted.%s.stripe-index", this->name);
- ret = dict_set_int32 (dict, key, 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- while (trav) {
- STACK_WIND (frame, stripe_open_lookup_cbk,
- trav->xlator, trav->xlator->fops->lookup,
- loc, dict);
- trav = trav->next;
- }
- if (dict)
- dict_unref (dict);
-
- return 0;
- }
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fctx->static_array = 1;
- local->fctx->stripe_size = local->stripe_size;
- local->fctx->stripe_count = priv->child_count;
- local->fctx->xl_array = priv->xl_array;
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
while (trav) {
STACK_WIND (frame, stripe_open_cbk, trav->xlator,
trav->xlator->fops->open,
&local->loc, local->flags, local->fd,
- wbflags);
+ xdata);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2534,7 +2557,7 @@ stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
STRIPE_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
}
out:
return 0;
@@ -2542,7 +2565,7 @@ out:
int32_t
-stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -2564,8 +2587,7 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2576,19 +2598,19 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd);
+ trav->xlator->fops->opendir, loc, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2628,7 +2650,7 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
STRIPE_STACK_UNWIND (lk, frame, local->op_ret,
- local->op_errno, &local->lock);
+ local->op_errno, &local->lock, NULL);
}
out:
return 0;
@@ -2636,7 +2658,7 @@ out:
int32_t
stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -2652,8 +2674,7 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2664,20 +2685,20 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
while (trav) {
STACK_WIND (frame, stripe_lk_cbk, trav->xlator,
- trav->xlator->fops->lk, fd, cmd, lock);
+ trav->xlator->fops->lk, fd, cmd, lock, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2714,14 +2735,14 @@ stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
STRIPE_STACK_UNWIND (flush, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2741,8 +2762,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto err;
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2753,13 +2773,13 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd);
+ trav->xlator->fops->flush, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (flush, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2768,7 +2788,7 @@ err:
int32_t
stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2804,6 +2824,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -2826,18 +2849,19 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (fsync, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2849,31 +2873,38 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->fctx = fctx;
+
local->op_ret = -1;
frame->local = local;
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags);
+ trav->xlator->fops->fsync, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2908,6 +2939,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf = *buf;
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -2924,7 +2958,7 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STRIPE_STACK_UNWIND (fstat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ local->op_errno, &local->stbuf, NULL);
}
out:
@@ -2934,11 +2968,12 @@ out:
int32_t
stripe_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2950,8 +2985,7 @@ stripe_fstat (call_frame_t *frame,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2960,26 +2994,35 @@ stripe_fstat (call_frame_t *frame,
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(fd->inode->ia_type)) {
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd);
+ trav->xlator->fops->fstat, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
-stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
+ int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2987,11 +3030,9 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
VALIDATE_OR_GOTO (fd->inode, err);
priv = this->private;
- trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3000,22 +3041,60 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->ftruncate, fd, offset);
- trav = trav->next;
- }
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (!fctx->stripe_count) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe count");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
+ "%d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
+ NULL);
+ }
return 0;
err:
- STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3052,14 +3131,14 @@ stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -3075,8 +3154,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3087,20 +3165,20 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
while (trav) {
STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags);
+ trav->xlator->fops->fsyncdir, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
int32_t
stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t i = 0;
int32_t callcnt = 0;
@@ -3110,6 +3188,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt tmp_stbuf = {0,};
struct iobref *tmp_iobref = NULL;
struct iobuf *iobuf = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3117,13 +3196,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local = frame->local;
+ prev = cookie;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
- if (op_ret != -1)
+ if (op_ret != -1) {
+ correct_file_size(buf, local->fctx, prev);
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
+ }
}
UNLOCK (&frame->lock);
@@ -3152,7 +3234,8 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
vec[count].iov_len =
(local->replies[i].requested_size -
local->replies[i].op_ret);
- iobuf = iobuf_get (this->ctx->iobuf_pool);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ vec[count].iov_len);
if (!iobuf) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory.");
@@ -3161,9 +3244,11 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto done;
}
memset (iobuf->ptr, 0, vec[count].iov_len);
- iobref_add (local->iobref, iobuf);
vec[count].iov_base = iobuf->ptr;
+ iobref_add (local->iobref, iobuf);
+ iobuf_unref(iobuf);
+
op_ret += vec[count].iov_len;
count++;
}
@@ -3181,11 +3266,10 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_FREE (local->replies);
tmp_iobref = local->iobref;
STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec,
- count, &tmp_stbuf, tmp_iobref);
+ count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (vec)
- GF_FREE (vec);
+ GF_FREE (vec);
}
out:
return 0;
@@ -3198,7 +3282,7 @@ out:
int32_t
stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
int32_t index = 0;
int32_t callcnt = 0;
@@ -3209,8 +3293,10 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_local_t *local = NULL;
struct iovec *final_vec = NULL;
struct iatt tmp_stbuf = {0,};
+ struct iatt *tmp_stbuf_p = NULL; //need it for a warning
struct iobref *tmp_iobref = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3219,6 +3305,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
index = local->node_index;
+ prev = cookie;
mframe = local->orig_frame;
if (!mframe)
goto out;
@@ -3239,6 +3326,12 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
mlocal->replies[index].count = count;
mlocal->replies[index].vector = iov_dup (vector, count);
+ correct_file_size(stbuf, fctx, prev);
+
+ if (local->stbuf_size < stbuf->ia_size)
+ local->stbuf_size = stbuf->ia_size;
+ local->stbuf_blocks += stbuf->ia_blocks;
+
if (!mlocal->iobref)
mlocal->iobref = iobref_new ();
iobref_merge (mlocal->iobref, iobref);
@@ -3295,17 +3388,21 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* cause any bugs at higher levels */
memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf,
sizeof (struct iatt));
+ tmp_stbuf.ia_size = local->stbuf_size;
+ tmp_stbuf.ia_blocks = local->stbuf_blocks;
done:
/* */
GF_FREE (mlocal->replies);
tmp_iobref = mlocal->iobref;
+ /* work around for nfs truncated read. Bug 3774 */
+ tmp_stbuf_p = &tmp_stbuf;
+ WIPE (tmp_stbuf_p);
STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref);
+ final_count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (final_vec)
- GF_FREE (final_vec);
+ GF_FREE (final_vec);
}
goto out;
@@ -3317,7 +3414,7 @@ check_size:
STACK_WIND (mframe, stripe_readv_fstat_cbk,
(fctx->xl_array[index]),
(fctx->xl_array[index])->fops->fstat,
- mlocal->fd);
+ mlocal->fd, NULL);
}
out:
@@ -3329,7 +3426,7 @@ end:
int32_t
stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int32_t op_errno = EINVAL;
int32_t idx = 0;
@@ -3342,6 +3439,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
uint64_t stripe_size = 0;
off_t rounded_start = 0;
off_t frame_offset = offset;
+ off_t dest_offset = 0;
stripe_local_t *local = NULL;
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
@@ -3352,7 +3450,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EBADFD;
goto err;
@@ -3360,6 +3458,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
if (!stripe_size) {
gf_log (this->name, GF_LOG_DEBUG,
"Wrong stripe size for the file");
@@ -3374,8 +3474,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rounded_end = roof (offset+size, stripe_size);
num_stripe = (rounded_end- rounded_start)/stripe_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3383,8 +3482,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
frame->local = local;
/* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC (num_stripe, sizeof (struct readv_replies),
- gf_stripe_mt_readv_replies);
+ local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
if (!local->replies) {
op_errno = ENOMEM;
goto err;
@@ -3399,8 +3498,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
for (index = off_index; index < (num_stripe + off_index); index++) {
rframe = copy_frame (frame);
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
op_errno = ENOMEM;
goto err;
@@ -3414,9 +3512,16 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rlocal->readv_size = frame_size;
rframe->local = rlocal;
idx = (index % fctx->stripe_count);
+
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(frame_offset,
+ stripe_size, fctx->stripe_count);
+ else
+ dest_offset = frame_offset;
+
STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->readv,
- fd, frame_size, frame_offset);
+ fd, frame_size, dest_offset, flags, xdata);
frame_offset += frame_size;
}
@@ -3426,7 +3531,7 @@ err:
if (rframe)
STRIPE_STACK_DESTROY (rframe);
- STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -3434,11 +3539,15 @@ err:
int32_t
stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+ struct stripe_replies *reply = NULL;
+ int32_t i = 0;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3447,39 +3556,82 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = ++local->call_count;
+ callcnt = ++mlocal->call_count;
+
+ mlocal->replies[local->node_index].op_ret = op_ret;
+ mlocal->replies[local->node_index].op_errno = op_errno;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = -1;
- }
if (op_ret >= 0) {
- local->op_ret += op_ret;
- local->post_buf = *postbuf;
- local->pre_buf = *prebuf;
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
}
UNLOCK (&frame->lock);
- if ((callcnt == local->wind_count) && local->unwind) {
- STRIPE_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf);
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ /*
+ * Only return the number of consecutively written bytes up until
+ * the first error. Only return an error if it occurs first.
+ *
+ * When a short write occurs, the application should retry at the
+ * appropriate offset, at which point we'll potentially pass back
+ * the error.
+ */
+ for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
+ i++, reply++) {
+ if (reply->op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "reply %d "
+ "returned error %s", i,
+ strerror(reply->op_errno));
+ if (!mlocal->op_ret) {
+ mlocal->op_ret = -1;
+ mlocal->op_errno = reply->op_errno;
+ }
+ break;
+ }
+
+ mlocal->op_ret += reply->op_ret;
+
+ if (reply->op_ret < reply->requested_size)
+ break;
+ }
+
+ GF_FREE(mlocal->replies);
+
+ STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
}
out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
int32_t
stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
struct iovec *tmp_vec = NULL;
stripe_local_t *local = NULL;
@@ -3493,13 +3645,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t fill_size = 0;
uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ off_t rounded_start = 0;
+ off_t rounded_end = 0;
+ int32_t total_chunks = 0;
+ call_frame_t *wframe = NULL;
+ stripe_local_t *wlocal = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EINVAL;
goto err;
@@ -3507,22 +3665,51 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
/* File has to be stripped across the child nodes */
for (idx = 0; idx< count; idx ++) {
total_size += vector[idx].iov_len;
}
remaining_size = total_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
local->stripe_size = stripe_size;
+ local->fctx = fctx;
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ rounded_start = floor(offset, stripe_size);
+ rounded_end = roof(offset + total_size, stripe_size);
+ total_chunks = (rounded_end - rounded_start) / stripe_size;
+ local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
+ if (!local->replies) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ total_chunks = 0;
while (1) {
+ wframe = copy_frame(frame);
+ wlocal = mem_get0(this->local_pool);
+ if (!wlocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ wlocal->orig_frame = frame;
+ wframe->local = wlocal;
+
/* Send striped chunk of the vector to child
nodes appropriately. */
idx = (((offset + offset_offset) /
@@ -3550,47 +3737,589 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (remaining_size == 0)
local->unwind = 1;
- STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx],
+ /*
+ * Store off the request index (with respect to the chunk of the
+ * initial offset) and the size of the request. This is required
+ * in the callback to calculate an appropriate return value in
+ * the event of a write failure in one or more requests.
+ */
+ wlocal->node_index = total_chunks;
+ local->replies[total_chunks].requested_size = fill_size;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, offset + offset_offset, iobref);
+ tmp_count, dest_offset, flags, iobref,
+ xdata);
+
GF_FREE (tmp_vec);
offset_offset += fill_size;
+ total_chunks++;
if (remaining_size == 0)
break;
}
return 0;
err:
- STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ if (wframe)
+ STRIPE_STACK_DESTROY(wframe);
+
+ STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_release (xlator_t *this, fd_t *fd)
+stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send fallocate request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single fallocate per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->fallocate, fd, mode,
+ dest_offset, fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+ VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_del (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
goto err;
}
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send discard request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single discard per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->discard, fd, dest_offset,
+ fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
- if (!fctx->static_array)
- GF_FREE (fctx->xl_array);
+ STRIPE_VALIDATE_FCTX (fctx, err);
- GF_FREE (fctx);
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size,
+ fctx->stripe_count);
+
+ STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->zerofill, fd,
+ dest_offset, fill_size, xdata);
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_release (xlator_t *this, fd_t *fd)
+{
return 0;
}
+int
+stripe_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_fctx = 0;
+ stripe_fd_ctx_t *fctx = NULL;
+
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (inode, err);
+
+ (void) inode_ctx_del (inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ goto err;
+ }
+
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+
+ if (!fctx->static_array)
+ GF_FREE (fctx->xl_array);
+
+ GF_FREE (fctx);
+err:
+ return 0;
+}
int32_t
notify (xlator_t *this, int32_t event, void *data, ...)
@@ -3598,6 +4327,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
stripe_private_t *priv = NULL;
int down_client = 0;
int i = 0;
+ gf_boolean_t heard_from_all_children = _gf_false;
if (!this)
return 0;
@@ -3609,30 +4339,34 @@ notify (xlator_t *this, int32_t event, void *data, ...)
switch (event)
{
case GF_EVENT_CHILD_UP:
- case GF_EVENT_CHILD_CONNECTING:
{
/* get an index number to set */
for (i = 0; i < priv->child_count; i++) {
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
if (data == FIRST_CHILD (this))
priv->first_child_down = 0;
- if (!priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
break;
+ case GF_EVENT_CHILD_CONNECTING:
+ {
+ // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
+ goto out;
+ }
case GF_EVENT_CHILD_DOWN:
{
/* get an index number to set */
@@ -3640,20 +4374,19 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
if (data == FIRST_CHILD (this))
priv->first_child_down = 1;
- if (priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
@@ -3663,79 +4396,252 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
/* */
default_notify (this, event, data);
+ goto out;
}
break;
}
+ // Consider child as down if it's last_event is not CHILD_UP
+ for (i = 0, down_client = 0; i < priv->child_count; i++)
+ if (priv->last_event[i] != GF_EVENT_CHILD_UP)
+ down_client++;
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+ }
+ UNLOCK (&priv->lock);
+
+ heard_from_all_children = _gf_true;
+ for (i = 0; i < priv->child_count; i++)
+ if (!priv->last_event[i])
+ heard_from_all_children = _gf_false;
+
+ if (heard_from_all_children)
+ default_notify (this, event, data);
+out:
return 0;
}
int
-set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+stripe_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
+ int ret = -1;
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r (data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup (stripe_str);
- stripe_opt = CALLOC (1, sizeof (struct stripe_options));
- if (!stripe_opt) {
- GF_FREE (dup_str);
- goto out;
- }
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
- }
- if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
- goto out;
- }
+ local = frame->local;
- if (stripe_opt->block_size < 512) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
- "%s. Should be atleast 512 bytes", num);
- goto out;
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+
+ /**
+ * We overwrite ->op_* values here for subsequent faliure
+ * conditions, hence we propogate the last errno down the
+ * stack.
+ */
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unlock;
}
- if (stripe_opt->block_size % 512) {
- gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
- " be a multiple of 512 bytes", num);
- goto out;
+ }
+
+ unlock:
+ UNLOCK (&frame->lock);
+
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BD_XLATOR
+int
+stripe_is_bd (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *is_bd = data;
+
+ if (data == NULL)
+ return 0;
+
+ if (XATTR_IS_BD (key))
+ *is_bd = _gf_true;
+
+ return 0;
+}
+
+inline gf_boolean_t
+stripe_setxattr_is_bd (dict_t *dict)
+{
+ gf_boolean_t is_bd = _gf_false;
+
+ if (dict == NULL)
+ goto out;
+
+ dict_foreach (dict, stripe_is_bd, &is_bd);
+out:
+ return is_bd;
+}
+#else
+#define stripe_setxattr_is_bd(dict) _gf_false
+#endif
+
+int
+stripe_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int i = 0;
+ gf_boolean_t is_bd = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+ local->wind_count = priv->child_count;
+ local->op_ret = local->op_errno = 0;
+
+ is_bd = stripe_setxattr_is_bd (dict);
+
+ /**
+ * Set xattrs for directories on all subvolumes. Additionally
+ * this power is only given to a special client. Bd xlator
+ * also needs xattrs for regular files (ie LVs)
+ */
+ if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
+ IA_ISDIR (loc->inode->ia_type)) || is_bd) {
+ for (i = 0; i < priv->child_count; i++, trav = trav->next) {
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ trav->xlator, trav->xlator->fops->setxattr,
+ loc, dict, flags, xdata);
}
+ } else {
+ local->wind_count = 1;
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
- memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %"PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
- if (!priv->pattern) {
- priv->pattern = stripe_opt;
- } else {
- temp_stripeopt = priv->pattern;
- while (temp_stripeopt->next)
- temp_stripeopt = temp_stripeopt->next;
- temp_stripeopt->next = stripe_opt;
+int
+stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+stripe_is_special_key (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ gf_boolean_t *is_special = NULL;
+
+ if (data == NULL) {
+ goto out;
+ }
+
+ is_special = data;
+
+ if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key))
+ *is_special = _gf_true;
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ stripe_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->wind_count;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+ return 0;
+}
+
+int
+stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ stripe_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ goto out;
+ }
+
+ frame->local = local;
+
+ local->wind_count = priv->child_count;
+
+ trav = this->children;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_fsetxattr_everyone_cbk,
+ trav->xlator, trav->xlator->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ trav = trav->next;
}
ret = 0;
@@ -3743,80 +4649,220 @@ out:
return ret;
}
-int32_t
-stripe_iatt_merge (struct iatt *from, struct iatt *to)
+inline gf_boolean_t
+stripe_fsetxattr_is_special (dict_t *dict)
{
- if (to->ia_size < from->ia_size)
- to->ia_size = from->ia_size;
- if (to->ia_mtime < from->ia_mtime)
- to->ia_mtime = from->ia_mtime;
- if (to->ia_ctime < from->ia_ctime)
- to->ia_ctime = from->ia_ctime;
- if (to->ia_atime < from->ia_atime)
- to->ia_atime = from->ia_atime;
- return 0;
+ gf_boolean_t is_spl = _gf_false;
+
+ if (dict == NULL) {
+ goto out;
+ }
+
+ dict_foreach (dict, stripe_is_special_key, &is_spl);
+
+out:
+ return is_spl;
}
-int32_t
-stripe_readdirp_entry_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+int
+stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
- stripe_local_t *local = NULL;
- int32_t done = 0;
+ int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
+ gf_boolean_t is_spl = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ is_spl = stripe_fsetxattr_is_special (dict);
+ if (is_spl) {
+ ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict,
+ flags, xdata);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (!this || !frame || !frame->local || !cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
goto out;
}
- entry = cookie;
+
+ STACK_WIND (frame, stripe_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+out:
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, stripe_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, stripe_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *parent)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ stripe_local_t *main_local = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ int done = 0;
+
local = frame->local;
+ prev = cookie;
+
+ entry = local->dirent;
+
+ main_frame = local->orig_frame;
+ main_local = main_frame->local;
LOCK (&frame->lock);
{
- local->wind_count--;
- if (!local->wind_count)
+ local->call_count--;
+ if (!local->call_count)
done = 1;
if (op_ret == -1) {
local->op_errno = op_errno;
local->op_ret = op_ret;
goto unlock;
}
- stripe_iatt_merge (buf, &entry->d_stat);
+
+ if (stripe_ctx_handle(this, prev, local, xattr))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict.");
+
+ correct_file_size(stbuf, local->fctx, prev);
+
+ stripe_iatt_merge (stbuf, &entry->d_stat);
+ local->stbuf_blocks += stbuf->ia_blocks;
}
unlock:
UNLOCK(&frame->lock);
if (done) {
- frame->local = NULL;
- STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, &local->entries);
+ inode_ctx_put (entry->inode, this,
+ (uint64_t) (long)local->fctx);
- gf_dirent_free (&local->entries);
+ done = 0;
+ LOCK (&main_frame->lock);
+ {
+ main_local->wind_count--;
+ if (!main_local->wind_count)
+ done = 1;
+ if (local->op_ret == -1) {
+ main_local->op_errno = local->op_errno;
+ main_local->op_ret = local->op_ret;
+ }
+ entry->d_stat.ia_blocks = local->stbuf_blocks;
+ }
+ UNLOCK (&main_frame->lock);
+ if (done) {
+ main_frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, main_frame,
+ main_local->op_ret,
+ main_local->op_errno,
+ &main_local->entries, NULL);
+ gf_dirent_free (&main_local->entries);
+ stripe_local_wipe (main_local);
+ mem_put (main_local);
+ }
+ frame->local = NULL;
stripe_local_wipe (local);
- GF_FREE (local);
+ mem_put (local);
+ STRIPE_STACK_DESTROY (frame);
}
-out:
- return 0;
+ return 0;
}
int32_t
stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries)
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *orig_entries, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
gf_dirent_t *local_entry = NULL;
- int32_t ret = -1;
gf_dirent_t *tmp_entry = NULL;
xlator_list_t *trav = NULL;
loc_t loc = {0, };
- inode_t *inode = NULL;
- char *path;
int32_t count = 0;
stripe_private_t *priv = NULL;
int32_t subvols = 0;
+ dict_t *xattrs = NULL;
+ call_frame_t *local_frame = NULL;
+ stripe_local_t *local_ent = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3842,8 +4888,9 @@ stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
list_splice_init (&orig_entries->list,
&local->entries.list);
- local->wind_count = op_ret * subvols;
+ local->wind_count = op_ret;
}
+
}
unlock:
UNLOCK (&frame->lock);
@@ -3851,8 +4898,10 @@ unlock:
if (op_ret == -1)
goto out;
+ xattrs = dict_new ();
+ if (xattrs)
+ (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
count = op_ret;
- ret = 0;
list_for_each_entry_safe (local_entry, tmp_entry,
(&local->entries.list), list) {
@@ -3861,67 +4910,73 @@ unlock:
if (!IA_ISREG (local_entry->d_stat.ia_type)) {
LOCK (&frame->lock);
{
- local->wind_count -= subvols;
+ local->wind_count--;
count = local->wind_count;
}
UNLOCK (&frame->lock);
continue;
}
- inode = inode_new (local->fd->inode->table);
- if (!inode)
+ local_frame = copy_frame (frame);
+
+ if (!local_frame) {
+ op_errno = ENOMEM;
+ op_ret = -1;
goto out;
+ }
- loc.ino = inode->ino = local_entry->d_ino;
- loc.inode = inode;
- loc.parent = local->fd->inode;
- ret = inode_path (local->fd->inode, local_entry->d_name, &path);
- if (ret != -1) {
- loc.path = path;
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret != -1) {
- loc.path = path;
- } else {
- goto out;
- }
+ local_ent = mem_get0 (this->local_pool);
+ if (!local_ent) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
}
- loc.name = strrchr (loc.path, '/');
- loc.name++;
+ loc.inode = inode_ref (local_entry->inode);
+
+ uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid);
+
+ local_ent->orig_frame = frame;
+
+ local_ent->call_count = subvols;
+
+ local_ent->dirent = local_entry;
+
+ local_frame->local = local_ent;
+
trav = this->children;
while (trav) {
- STACK_WIND_COOKIE (frame, stripe_readdirp_entry_stat_cbk,
- local_entry, trav->xlator,
- trav->xlator->fops->stat, &loc);
+ STACK_WIND (local_frame, stripe_readdirp_lookup_cbk,
+ trav->xlator, trav->xlator->fops->lookup,
+ &loc, xattrs);
trav = trav->next;
}
- inode_unref (loc.inode);
+ loc_wipe (&loc);
}
out:
if (!count) {
/* all entries are directories */
frame->local = NULL;
STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, &local->entries);
+ local->op_errno, &local->entries, NULL);
gf_dirent_free (&local->entries);
stripe_local_wipe (local);
- GF_FREE (local);
+ mem_put (local);
}
-
+ if (xattrs)
+ dict_unref (xattrs);
return 0;
}
int32_t
stripe_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
int op_errno = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -3935,8 +4990,7 @@ stripe_readdirp (call_frame_t *frame, xlator_t *this,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3956,15 +5010,16 @@ stripe_readdirp (call_frame_t *frame, xlator_t *this,
goto err;
STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator,
- trav->xlator->fops->readdirp, fd, size, off);
+ trav->xlator->fops->readdirp, fd, size, off, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -3984,65 +5039,87 @@ mem_acct_init (xlator_t *this)
out:
return ret;
}
-int
-validate_options (xlator_t *this, char **op_errstr)
-{
- int ret = 0;
- volume_opt_list_t *vol_opt = NULL;
- volume_opt_list_t *tmp;
- if (!this) {
- gf_log (this->name, GF_LOG_DEBUG, "'this' not a valid ptr");
- ret =-1;
- goto out;
- }
+static int
+clear_pattern_list (stripe_private_t *priv)
+{
+ struct stripe_options *prev = NULL;
+ struct stripe_options *trav = NULL;
+ int ret = -1;
- if (list_empty (&this->volume_options))
- goto out;
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
- vol_opt = list_entry (this->volume_options.next,
- volume_opt_list_t, list);
- list_for_each_entry_safe (vol_opt, tmp, &this->volume_options, list) {
- ret = validate_xlator_volume_options_attacherr (this,
- vol_opt->given_opt,
- op_errstr);
+ trav = priv->pattern;
+ priv->pattern = NULL;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
}
-out:
-
+ ret = 0;
+ out:
return ret;
+
+
}
+
int
reconfigure (xlator_t *this, dict_t *options)
{
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- int ret = 0;
+ stripe_private_t *priv = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
- priv = this->private;
+ priv = this->private;
- data = dict_get (options, "block-size");
- if (data) {
- gf_log (this->name, GF_LOG_TRACE,"Reconfiguring Stripe"
- " Block-size");
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Reconfigue: Block-Size reconfiguration failed");
- ret = -1;
- goto out;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "Reconfigue: Block-Size reconfigured Successfully");
- }
- else {
- priv->block_size = (128 * GF_UNIT_KB);
+
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ ret = clear_pattern_list (priv);
+ if (ret)
+ goto unlock;
+
+ data = dict_get (options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
+ } else {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
+ unlock);
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
-out:
- return ret;
+ ret = 0;
+ out:
+ return ret;
}
@@ -4055,9 +5132,9 @@ int32_t
init (xlator_t *this)
{
stripe_private_t *priv = NULL;
+ volume_option_t *opt = NULL;
xlator_list_t *trav = NULL;
data_t *data = NULL;
- char *def_blk_size = NULL;
int32_t count = 0;
int ret = -1;
@@ -4099,9 +5176,9 @@ init (xlator_t *this)
if (!priv->xl_array)
goto out;
- priv->state = GF_CALLOC (count, sizeof (int8_t),
- gf_stripe_mt_int8_t);
- if (!priv->state)
+ priv->last_event = GF_CALLOC (count, sizeof (int),
+ gf_stripe_mt_int32_t);
+ if (!priv->last_event)
goto out;
priv->child_count = count;
@@ -4121,77 +5198,56 @@ init (xlator_t *this)
goto out;
}
- if (xlator_get_volopt_info (&this->volume_options, "block-size",
- &def_blk_size, NULL)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of stripe "
- "block-size corrupt");
- ret = -1;
- goto out;
- } else {
- if (gf_string2bytesize (def_blk_size, &priv->block_size)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- "stripe block-size corrupt");
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
ret = -1;
- goto out;
+ goto unlock;
+ }
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ /* option stripe-pattern *avi:1GB,*pdf:16K */
+ data = dict_get (this->options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
}
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
+ GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out);
+ /* notify related */
+ priv->nodes_down = priv->child_count;
- /* option stripe-pattern *avi:1GB,*pdf:4096 */
- data = dict_get (this->options, "block-size");
- if (!data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No \"option block-size <x>\" given, defaulting "
- "to %s", def_blk_size);
- } else {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto out;
- }
+ GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
- if (xlator_get_volopt_info (&this->volume_options, "use-xattr",
- &def_blk_size, NULL)) {
+ this->local_pool = mem_pool_new (stripe_local_t, 128);
+ if (!this->local_pool) {
ret = -1;
gf_log (this->name, GF_LOG_ERROR,
- "error setting(default) hard check for extended"
- " attribute");
+ "failed to create local_t's memory pool");
goto out;
-
- }
- else {
- if (gf_string2boolean (def_blk_size,
- &priv->xattr_supported)) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "error setting(default) hard check for extended"
- " attribute");
- goto out;
- }
}
-
- data = dict_get (this->options, "use-xattr");
- if (data) {
- if (gf_string2boolean (data->data,
- &priv->xattr_supported) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "error setting hard check for extended "
- "attribute");
- //return -1;
- }
- }
-
- /* notify related */
- priv->nodes_down = priv->child_count;
this->private = priv;
-
ret = 0;
out:
if (ret) {
if (priv) {
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ GF_FREE (priv->xl_array);
GF_FREE (priv);
}
}
@@ -4215,15 +5271,15 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
this->private = NULL;
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ GF_FREE (priv->xl_array);
trav = priv->pattern;
while (trav) {
prev = trav;
trav = trav->next;
- FREE (prev);
+ GF_FREE (prev);
}
+ GF_FREE (priv->last_event);
LOCK_DESTROY (&priv->lock);
GF_FREE (priv);
}
@@ -4234,17 +5290,50 @@ out:
int32_t
stripe_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
+int
+stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ char coalesce_key[256] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ if (!xattr || (op_ret == -1))
+ goto out;
+
+ sprintf (size_key, "trusted.%s.stripe-size", this->name);
+ sprintf (count_key, "trusted.%s.stripe-count", this->name);
+ sprintf (index_key, "trusted.%s.stripe-index", this->name);
+ sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name);
+
+ dict_del (xattr, size_key);
+ dict_del (xattr, count_key);
+ dict_del (xattr, index_key);
+ dict_del (xattr, coalesce_key);
+
+out:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+
+}
int
stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
int call_cnt = 0;
stripe_local_t *local = NULL;
@@ -4274,92 +5363,39 @@ stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
if (!call_cnt) {
STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
- local->xattr);
+ local->xattr, xdata);
}
return 0;
}
int32_t
-stripe_pathinfo_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
-{
- int32_t i = 0;
- int32_t ret = -1;
- int32_t len = 0;
- char *sbuf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!buffer || !local || !local->xattr_list)
- goto out;
-
- sbuf = buffer;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->pathinfo_len;
-
- if (len && xattr && xattr->pathinfo) {
- memcpy (buffer, xattr->pathinfo, len);
- buffer += len;
- *buffer++ = ' ';
- }
- }
-
- *--buffer = '\0';
- if (total)
- *total = buffer - sbuf;
- ret = 0;
-
- out:
- return ret;
-}
-
-int32_t
-stripe_free_pathinfo_str (stripe_local_t *local)
+stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- int32_t i = 0;
- int32_t ret = -1;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!local || !local->xattr_list)
- goto out;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
-
- if (xattr && xattr->pathinfo)
- GF_FREE (xattr->pathinfo);
- }
-
- ret = 0;
- out:
- return ret;
-}
-
-int32_t
-stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict) {
stripe_local_t *local = NULL;
int32_t callcnt = 0;
int32_t ret = -1;
long cky = 0;
- char *pathinfo = NULL;
- char *pathinfo_serz = NULL;
- int32_t padding = 0;
- int32_t tlen = 0;
- char stripe_size_str[20] = {0,};
+ void *xattr_val = NULL;
+ void *xattr_serz = NULL;
stripe_xattr_sort_t *xattr = NULL;
dict_t *stripe_xattr = NULL;
if (!frame || !frame->local || !this) {
- gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
return ret;
}
local = frame->local;
cky = (long) cookie;
+ if (local->xsel[0] == '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk");
+ return ret;
+ }
+
LOCK (&frame->lock);
{
callcnt = --local->wind_count;
@@ -4368,23 +5404,26 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
goto out;
if (!local->xattr_list)
- local->xattr_list = (stripe_xattr_sort_t *) GF_CALLOC (local->nallocs,
- sizeof (stripe_xattr_sort_t),
- gf_stripe_mt_xattr_sort_t);
+ local->xattr_list = (stripe_xattr_sort_t *)
+ GF_CALLOC (local->nallocs,
+ sizeof (stripe_xattr_sort_t),
+ gf_stripe_mt_xattr_sort_t);
if (local->xattr_list) {
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret)
- goto out;
-
xattr = local->xattr_list + (int32_t) cky;
- pathinfo = gf_strdup (pathinfo);
+ ret = dict_get_ptr_and_len (dict, local->xsel,
+ &xattr_val,
+ &xattr->xattr_len);
+ if (xattr->xattr_len == 0)
+ goto out;
+
xattr->pos = cky;
- xattr->pathinfo = pathinfo;
- xattr->pathinfo_len = strlen (pathinfo);
+ xattr->xattr_value = gf_memdup (xattr_val,
+ xattr->xattr_len);
- local->xattr_total_len += strlen (pathinfo) + 1;
+ if (xattr->xattr_value != NULL)
+ local->xattr_total_len += xattr->xattr_len + 1;
}
}
out:
@@ -4398,41 +5437,36 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
if (!stripe_xattr)
goto unwind;
- snprintf (stripe_size_str, 20, "%ld", local->stripe_size);
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
- + strlen (stripe_size_str) + 7;
- local->xattr_total_len += (padding + 2);
-
- pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
- gf_common_mt_char);
- if (!pathinfo_serz)
- goto unwind;
-
- /* xlator info */
- sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ", this->name, stripe_size_str);
-
- ret = stripe_pathinfo_aggregate (pathinfo_serz + padding, local, &tlen);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list");
+ /* select filler based on ->xsel */
+ if (XATTR_IS_PATHINFO (local->xsel))
+ ret = stripe_fill_pathinfo_xattr (this, local,
+ (char **)&xattr_serz);
+ else if (XATTR_IS_LOCKINFO (local->xsel)) {
+ ret = stripe_fill_lockinfo_xattr (this, local,
+ &xattr_serz);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown xattr in xattr request");
goto unwind;
}
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
-
- ret = dict_set_dynstr (stripe_xattr, GF_XATTR_PATHINFO_KEY, pathinfo_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo key in dict");
+ if (!ret) {
+ ret = dict_set_dynptr (stripe_xattr, local->xsel,
+ xattr_serz,
+ local->xattr_total_len);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Can't set %s key in dict",
+ local->xsel);
+ }
unwind:
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, stripe_xattr);
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ stripe_xattr, NULL);
- ret = stripe_free_pathinfo_str (local);
+ ret = stripe_free_xattr_str (local);
- if (local->xattr_list)
- GF_FREE (local->xattr_list);
+ GF_FREE (local->xattr_list);
if (stripe_xattr)
dict_unref (stripe_xattr);
@@ -4443,14 +5477,15 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
int32_t
stripe_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int i = 0;
- xlator_t **sub_volumes;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int i = 0;
+ xlator_t **sub_volumes;
+ int ret = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -4462,8 +5497,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -4474,7 +5508,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
sub_volumes = alloca ( priv->child_count *
@@ -4489,7 +5523,8 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (cluster_getmarkerattr (frame, this, loc, name,
local, stripe_getxattr_unwind,
sub_volumes, priv->child_count,
- MARKER_UUID_TYPE, priv->vol_uuid)) {
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ priv->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
@@ -4505,25 +5540,39 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
trav = trav->next) {
STACK_WIND (frame, stripe_getxattr_cbk,
trav->xlator, trav->xlator->fops->getxattr,
- loc, name);
+ loc, name, xdata);
}
return 0;
}
- if (name && (strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0)) {
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ if (name &&
+ ((strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)) == 0))) {
+ if (IA_ISREG (loc->inode->ia_type)) {
+ ret = inode_ctx_get (loc->inode, this,
+ (uint64_t *) &local->fctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe size unavailable from fctx"
+ " relying on pathinfo could lead to"
+ " wrong results");
+ }
+
local->nallocs = local->wind_count = priv->child_count;
+ (void) strncpy (local->xsel, name, strlen (name));
+ /**
+ * for xattrs that need info from all childs, fill ->xsel
+ * as above and call the filler function in cbk based on
+ * it
+ */
for (i = 0, trav = this->children; i < priv->child_count; i++,
trav = trav->next) {
- STACK_WIND_COOKIE (frame, stripe_getxattr_pathinfo_cbk,
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
(void *) (long) i, trav->xlator,
trav->xlator->fops->getxattr,
- loc, name);
+ loc, name, xdata);
}
return 0;
@@ -4531,46 +5580,128 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (name &&(*priv->vol_uuid)) {
if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
- local->marker.call_count = priv->child_count;
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
- sub_volumes = alloca ( priv->child_count *
- sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+ if (!IA_FILE_OR_DIR (loc->inode->ia_type))
+ local->marker.call_count = 1;
+ else
+ local->marker.call_count = priv->child_count;
+ sub_volumes = alloca (local->marker.call_count *
+ sizeof (xlator_t *));
+
+ for (i = 0, trav = this->children;
+ i < local->marker.call_count;
+ i++, trav = trav->next) {
*(sub_volumes + i) = trav->xlator;
}
if (cluster_getmarkerattr (frame, this, loc, name,
- local, stripe_getxattr_unwind,
+ local,
+ stripe_getxattr_unwind,
sub_volumes,
- priv->child_count,
+ local->marker.call_count,
MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
priv->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
+
return 0;
}
}
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
+inline gf_boolean_t
+stripe_is_special_xattr (const char *name)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (!name) {
+ goto out;
+ }
+
+ if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))
+ || !strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)))
+ is_spl = _gf_true;
+out:
+ return is_spl;
+}
+
+int32_t
+stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t ret = -1, op_errno = 0;
+ int i = 0;
+ xlator_list_t *trav = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = -1;
+ frame->local = local;
+
+ strncpy (local->xsel, name, strlen (name));
+ local->nallocs = local->wind_count = priv->child_count;
+
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->fgetxattr,
+ fd, name, xdata);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return ret;
+}
+
+int32_t
+stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ if (stripe_is_special_xattr (name)) {
+ stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata);
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+
+out:
+ return 0;
+}
+
+
+
int32_t
stripe_priv_dump (xlator_t *this)
{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
char key[GF_DUMP_MAX_BUF_LEN];
int i = 0;
stripe_private_t *priv = NULL;
@@ -4588,39 +5719,26 @@ stripe_priv_dump (xlator_t *this)
goto out;
gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.stripe","%s.priv",
- this->name);
- gf_proc_dump_build_key(key, key_prefix, "child_count");
- gf_proc_dump_write(key,"%d", priv->child_count);
+ gf_proc_dump_write("child_count","%d", priv->child_count);
for (i = 0; i < priv->child_count; i++) {
- gf_proc_dump_build_key (key, key_prefix, "subvolumes[%d]", i);
+ sprintf (key, "subvolumes[%d]", i);
gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type,
priv->xl_array[i]->name);
}
options = priv->pattern;
while (options != NULL) {
- gf_proc_dump_build_key (key, key_prefix, "path_pattern");
- gf_proc_dump_write (key, "%s", priv->pattern->path_pattern);
-
- gf_proc_dump_build_key (key, key_prefix, "options_block_size");
- gf_proc_dump_write (key, "%ul", options->block_size);
+ gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern);
+ gf_proc_dump_write ("options_block_size", "%ul", options->block_size);
options = options->next;
}
- gf_proc_dump_build_key (key, key_prefix, "block_size");
- gf_proc_dump_write (key, "%ul", priv->block_size);
-
- gf_proc_dump_build_key (key, key_prefix, "nodes_down");
- gf_proc_dump_write (key, "%d", priv->nodes_down);
-
- gf_proc_dump_build_key (key, key_prefix, "first_child_down");
- gf_proc_dump_write (key, "%d", priv->first_child_down);
-
- gf_proc_dump_build_key (key, key_prefix, "xatter_supported");
- gf_proc_dump_write (key, "%d", priv->xattr_supported);
+ gf_proc_dump_write ("block_size", "%ul", priv->block_size);
+ gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down);
+ gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down);
+ gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported);
UNLOCK (&priv->lock);
@@ -4629,36 +5747,44 @@ out:
}
struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
-
- .getxattr = stripe_getxattr,
- .readdirp = stripe_readdirp,
+ .stat = stripe_stat,
+ .unlink = stripe_unlink,
+ .rename = stripe_rename,
+ .link = stripe_link,
+ .truncate = stripe_truncate,
+ .create = stripe_create,
+ .open = stripe_open,
+ .readv = stripe_readv,
+ .writev = stripe_writev,
+ .statfs = stripe_statfs,
+ .flush = stripe_flush,
+ .fsync = stripe_fsync,
+ .ftruncate = stripe_ftruncate,
+ .fstat = stripe_fstat,
+ .mkdir = stripe_mkdir,
+ .rmdir = stripe_rmdir,
+ .lk = stripe_lk,
+ .opendir = stripe_opendir,
+ .fsyncdir = stripe_fsyncdir,
+ .setattr = stripe_setattr,
+ .fsetattr = stripe_fsetattr,
+ .lookup = stripe_lookup,
+ .mknod = stripe_mknod,
+ .setxattr = stripe_setxattr,
+ .fsetxattr = stripe_fsetxattr,
+ .getxattr = stripe_getxattr,
+ .fgetxattr = stripe_fgetxattr,
+ .removexattr = stripe_removexattr,
+ .fremovexattr = stripe_fremovexattr,
+ .readdirp = stripe_readdirp,
+ .fallocate = stripe_fallocate,
+ .discard = stripe_discard,
+ .zerofill = stripe_zerofill,
};
struct xlator_cbks cbks = {
.release = stripe_release,
+ .forget = stripe_forget,
};
struct xlator_dumpops dumpops = {
@@ -4667,8 +5793,9 @@ struct xlator_dumpops dumpops = {
struct volume_options options[] = {
{ .key = {"block-size"},
- .type = GF_OPTION_TYPE_ANY,
+ .type = GF_OPTION_TYPE_SIZE_LIST,
.default_value = "128KB",
+ .min = STRIPE_MIN_BLOCK_SIZE,
.description = "Size of the stripe unit that would be read "
"from or written to the striped servers."
},
@@ -4676,5 +5803,12 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true"
},
+ { .key = {"coalesce"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Enable/Disable coalesce mode to flatten striped "
+ "files as stored on the server (i.e., eliminate holes "
+ "caused by the traditional format)."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
index 8d43a960e..5673d18f3 100644
--- a/xlators/cluster/stripe/src/stripe.h
+++ b/xlators/cluster/stripe/src/stripe.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -38,36 +29,53 @@
#include <signal.h>
#define STRIPE_PATHINFO_HEADER "STRIPE:"
-
+#define STRIPE_MIN_BLOCK_SIZE (16*GF_UNIT_KB)
#define STRIPE_STACK_UNWIND(fop, frame, params ...) do { \
stripe_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ stripe_local_wipe(__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_STACK_DESTROY(frame) do { \
+ stripe_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
if (__local) { \
- stripe_local_wipe(__local); \
- GF_FREE (__local); \
+ stripe_local_wipe (__local); \
+ mem_put (__local); \
} \
} while (0)
-#define STRIPE_STACK_DESTROY(frame) do { \
- stripe_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- if (__local) { \
- stripe_local_wipe (__local); \
- GF_FREE (__local); \
- } \
- } while (0)
+#define STRIPE_VALIDATE_FCTX(fctx, label) do { \
+ int idx = 0; \
+ if (!fctx) { \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ for (idx = 0; idx < fctx->stripe_count; idx++) { \
+ if (!fctx->xl_array[idx]) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "fctx->xl_array[%d] is NULL", \
+ idx); \
+ op_errno = ESTALE; \
+ goto label; \
+ } \
+ } \
+ } while (0)
typedef struct stripe_xattr_sort {
- int32_t pos;
- int32_t pathinfo_len;
- char *pathinfo;
+ int pos;
+ int xattr_len;
+ char *xattr_value;
} stripe_xattr_sort_t;
/**
@@ -90,16 +98,17 @@ struct stripe_private {
gf_lock_t lock;
uint8_t nodes_down;
int8_t first_child_down;
+ int *last_event;
int8_t child_count;
- int8_t *state; /* Current state of child node */
gf_boolean_t xattr_supported; /* default yes */
+ gf_boolean_t coalesce;
char vol_uuid[UUID_SIZE + 1];
};
/**
- * Used to keep info about the replies received from fops->readv calls
+ * Used to keep info about the replies received from readv/writev calls
*/
-struct readv_replies {
+struct stripe_replies {
struct iovec *vector;
int32_t count; //count of vector
int32_t op_ret; //op_ret of readv
@@ -111,6 +120,7 @@ struct readv_replies {
typedef struct _stripe_fd_ctx {
off_t stripe_size;
int stripe_count;
+ int stripe_coalesce;
int static_array;
xlator_t **xl_array;
} stripe_fd_ctx_t;
@@ -146,7 +156,7 @@ struct stripe_local {
blkcnt_t preparent_blocks;
blkcnt_t postparent_blocks;
- struct readv_replies *replies;
+ struct stripe_replies *replies;
struct statvfs statvfs_buf;
dir_entry_t *entry;
@@ -170,12 +180,15 @@ struct stripe_local {
loc_t loc;
loc_t loc2;
+ mode_t mode;
+ dev_t rdev;
/* For File I/O fops */
- dict_t *dict;
+ dict_t *xdata;
stripe_xattr_sort_t *xattr_list;
int32_t xattr_total_len;
int32_t nallocs;
+ char xsel[256];
struct marker_str marker;
@@ -192,12 +205,84 @@ struct stripe_local {
void *value;
struct iobref *iobref;
gf_dirent_t entries;
+ gf_dirent_t *dirent;
dict_t *xattr;
uuid_t ia_gfid;
+
+ int xflag;
+ mode_t umask;
};
typedef struct stripe_local stripe_local_t;
typedef struct stripe_private stripe_private_t;
+/*
+ * Determine the stripe index of a particular frame based on the translator.
+ */
+static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int32_t i, idx = -1;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (fctx->xl_array[i] == prev->this) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dst[i] = src[i];
+}
+
+void stripe_local_wipe (stripe_local_t *local);
+int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
+ stripe_local_t *local, dict_t *dict);
+void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
+int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
+ uint64_t stripe_size, uint32_t stripe_count,
+ uint32_t stripe_index,
+ uint32_t stripe_coalesce);
+int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
+int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
+int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
+int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz);
+int32_t stripe_free_xattr_str (stripe_local_t *local);
+int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
+ int32_t *total);
+off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
+off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index);
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz);
+
+/*
+ * Adjust the size attribute for files if coalesce is enabled.
+ */
+static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int index;
+
+ if (!IA_ISREG(buf->ia_type))
+ return;
+
+ if (!fctx || !fctx->stripe_coalesce)
+ return;
+
+ index = stripe_get_frame_index(fctx, prev);
+ buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
+ fctx->stripe_count, index);
+}
#endif /* _STRIPE_H_ */
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
deleted file mode 100644
index 2a1fe8372..000000000
--- a/xlators/cluster/unify/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-
-xlator_LTLIBRARIES = unify.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/legacy/cluster
-
-unify_la_LDFLAGS = -module -avoidversion
-
-unify_la_SOURCES = unify.c unify-self-heal.c
-unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = unify.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/unify/src/unify-mem-types.h b/xlators/cluster/unify/src/unify-mem-types.h
deleted file mode 100644
index 13c9cc1f7..000000000
--- a/xlators/cluster/unify/src/unify-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __UNIFY_MEM_TYPES_H__
-#define __UNIFY_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_unify_mem_types_ {
- gf_unify_mt_char = gf_common_mt_end + 1,
- gf_unify_mt_int16_t,
- gf_unify_mt_xlator_t,
- gf_unify_mt_unify_private_t,
- gf_unify_mt_xlator_list_t,
- gf_unify_mt_dir_entry_t,
- gf_unify_mt_off_t,
- gf_unify_mt_int,
- gf_unify_mt_unify_self_heal_struct,
- gf_unify_mt_unify_local_t,
- gf_unify_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
deleted file mode 100644
index f99e4c7c3..000000000
--- a/xlators/cluster/unify/src/unify-self-heal.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * unify-self-heal.c :
- * This file implements few functions which enables 'unify' translator
- * to be consistent in its behaviour when
- * > a node fails,
- * > a node gets added,
- * > a failed node comes back
- * > a new namespace server is added (ie, an fresh namespace server).
- *
- * This functionality of 'unify' will enable glusterfs to support storage
- * system failure, and maintain consistancy. This works both ways, ie, when
- * an entry (either file or directory) is found on namespace server, and not
- * on storage nodes, its created in storage nodes and vica-versa.
- *
- * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
- *
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "common-utils.h"
-
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
-
- if (local->sh_struct) {
- if (local->sh_struct->offset_list)
- GF_FREE (local->sh_struct->offset_list);
-
- if (local->sh_struct->entry_list)
- GF_FREE (local->sh_struct->entry_list);
-
- if (local->sh_struct->count_list)
- GF_FREE (local->sh_struct->count_list);
-
- GF_FREE (local->sh_struct);
- }
-
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-int32_t
-unify_sh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents on
- * storagenodes is still pending.
- */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more entries
- to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so, it
- can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_sh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_sh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
- }
- UNLOCK (&frame->lock);
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_sh_getdents_cbk -
- */
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_sh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_sh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_sh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_WARNING, "failed");
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- * track of offset sent to each node during
- * STACK_WIND.
- */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
-
- /* did stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
- /* Only 'self-heal' failed, lookup() was successful. */
- local->op_ret = 0;
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
- &local->stbuf, local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * gf_sh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_sh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in
- only one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be
- same accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at directory level */
- local->call_count = 0;
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
-
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->stbuf,
- local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/* Foreground self-heal part over */
-
-/* Background self-heal part */
-
-int32_t
-unify_bgsh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents
- on storagenodes is still pending. */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more
- entries to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- fd_unref (local->fd);
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so,
- it can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_bgsh_getdents_cbk -
- */
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_bgsh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
-
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_bgsh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_bgsh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int32_t callcnt = 0;
- int16_t index = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
- callcnt = local->call_count;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- track of offset sent to each node during
- STACK_WIND. */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
- /* did a stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- fd_unref (local->fd);
-
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/**
- * gf_bgsh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_bgsh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in only
- one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be same
- accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at the directory level */
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
-
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/* Background self-heal part over */
-
-
-
-
-/**
- * zr_unify_self_heal -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local)
-{
- unify_private_t *priv = this->private;
- call_frame_t *bg_frame = NULL;
- unify_local_t *bg_local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
- int16_t index = 0;
-
- if (local->inode_generation < priv->inode_generation) {
- /* Any self heal will be done at the directory level */
- /* Update the inode's generation to the current generation
- value. */
- local->inode_generation = priv->inode_generation;
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->inode_generation);
-
- if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
- local->op_ret = 0;
- local->failed = 0;
- local->call_count = priv->child_count + 1;
- local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &local->loc1,
- 0);
- }
-
- /* Self-heal in foreground, hence no need
- to UNWIND here */
- return 0;
- }
-
- /* Self Heal done in background */
- bg_frame = copy_frame (frame);
- INIT_LOCAL (bg_frame, bg_local);
- loc_copy (&bg_local->loc1, &local->loc1);
- bg_local->op_ret = 0;
- bg_local->failed = 0;
- bg_local->call_count = priv->child_count + 1;
- bg_local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (bg_frame,
- unify_bgsh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &bg_local->loc1,
- 0);
- }
- }
-
- /* generation number matches, self heal already done or
- * self heal done in background: just do STACK_UNWIND
- */
- tmp_inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- tmp_inode,
- &local->stbuf,
- local->dict,
- &local->oldpostparent);
-
- if (tmp_dict)
- dict_unref (tmp_dict);
-
- return 0;
-}
-
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
deleted file mode 100644
index 6dc93083d..000000000
--- a/xlators/cluster/unify/src/unify.c
+++ /dev/null
@@ -1,4589 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * xlators/cluster/unify:
- * - This xlator is one of the main translator in GlusterFS, which
- * actually does the clustering work of the file system. One need to
- * understand that, unify assumes file to be existing in only one of
- * the child node, and directories to be present on all the nodes.
- *
- * NOTE:
- * Now, unify has support for global namespace, which is used to keep a
- * global view of fs's namespace tree. The stat for directories are taken
- * just from the namespace, where as for files, just 'ia_ino' is taken from
- * Namespace node, and other stat info is taken from the actual storage node.
- * Also Namespace node helps to keep consistant inode for files across
- * glusterfs (re-)mounts.
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include <signal.h>
-#include <libgen.h>
-#include "compat-errno.h"
-#include "compat.h"
-
-#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
- if (!(_loc && _loc->inode)) { \
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-
-#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
- if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
- if (!_fd) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-
-
-/*
- * unify_normalize_stats -
- */
-void
-unify_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-xlator_t *
-unify_loc_subvol (loc_t *loc, xlator_t *this)
-{
- unify_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int16_t *list = NULL;
- long index = 0;
- xlator_t *subvol_i = NULL;
- int ret = 0;
- uint64_t tmp_list = 0;
-
- priv = this->private;
- subvol = NS (this);
-
- if (!IA_ISDIR (loc->inode->ia_type)) {
- ret = inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
- if (!list)
- goto out;
-
- for (index = 0; list[index] != -1; index++) {
- subvol_i = priv->xl_array[list[index]];
- if (subvol_i != NS (this)) {
- subvol = subvol_i;
- break;
- }
- }
- }
-out:
- return subvol;
-}
-
-
-
-/**
- * unify_statfs_cbk -
- */
-int32_t
-unify_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- int32_t callcnt = 0;
- struct statvfs *dict_buf = NULL;
- unsigned long bsize;
- unsigned long frsize;
- unify_local_t *local = (unify_local_t *)frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs_buf;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- unify_normalize_stats(dict_buf, bsize, frsize);
- unify_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = op_ret;
- } else {
- /* fop on storage node has failed due to some error */
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): %s",
- prev_frame->this->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs_buf);
- }
-
- return 0;
-}
-
-/**
- * unify_statfs -
- */
-int32_t
-unify_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- xlator_list_t *trav = this->children;
-
- INIT_LOCAL (frame, local);
- local->call_count = ((unify_private_t *)this->private)->child_count;
-
- while(trav) {
- STACK_WIND (frame,
- unify_statfs_cbk,
- trav->xlator,
- trav->xlator->fops->statfs,
- loc);
- trav = trav->next;
- }
-
- return 0;
-}
-
-/**
- * unify_buf_cbk -
- */
-int32_t
-unify_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = buf->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (buf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!IA_ISDIR (buf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-#define check_if_dht_linkfile(s) \
- ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) == S_ISVTX)
-
-/**
- * unify_lookup_cbk -
- */
-int32_t
-unify_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
- dict_t *local_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (local->revalidate &&
- (op_errno == ESTALE)) {
- /* ESTALE takes priority */
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (local->op_errno != ESTALE)) {
- /* if local->op_errno is already ESTALE, then
- * ESTALE has to propogated to the parent first.
- * do not enter here.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
-
- } else if (local->revalidate &&
- (local->op_errno != ESTALE) &&
- !(priv->optimist && (op_errno == ENOENT))) {
-
- gf_log (this->name,
- (op_errno == ENOTCONN) ?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (check_if_dht_linkfile(buf)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "file %s may be DHT link file on %s, "
- "make sure the backend is not shared "
- "between unify and DHT",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- }
-
- if (local->stbuf.ia_type && local->stbuf.ia_blksize) {
- /* make sure we already have a stbuf
- stored in local->stbuf */
- if (IA_ISDIR (local->stbuf.ia_type) &&
- !IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on namespace, non-directory "
- "on node '%s', returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- if (!IA_ISDIR (local->stbuf.ia_type) &&
- IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on node '%s', non-directory "
- "on namespace, returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- }
-
- if (!local->revalidate && !IA_ISDIR (buf->ia_type)) {
- /* This is the first time lookup on file*/
- if (!local->list) {
- /* list is not allocated, allocate
- the max possible range */
- local->list = GF_CALLOC (1, 2 * (priv->child_count + 2),
- gf_unify_mt_int16_t);
- if (!local->list) {
- gf_log (this->name,
- GF_LOG_CRITICAL,
- "Not enough memory");
- STACK_UNWIND (frame, -1,
- ENOMEM, inode,
- NULL, NULL, NULL);
- return 0;
- }
- }
- /* update the index of the list */
- local->list [local->index++] =
- (int16_t)(long)cookie;
- }
-
- if (!local->revalidate && IA_ISDIR (buf->ia_type)) {
- /* fresh lookup of a directory */
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- if ((!local->dict) && dict &&
- (priv->xl_array[(long)cookie] != NS(this))) {
- local->dict = dict_ref (dict);
- }
-
- /* index of NS node is == total child count */
- if (priv->child_count == (int16_t)(long)cookie) {
- /* Take the inode number from namespace */
- local->ia_ino = buf->ia_ino;
- if (IA_ISDIR (buf->ia_type) ||
- !(local->stbuf.ia_blksize)) {
- local->stbuf = *buf;
- local->oldpostparent = *postparent;
- }
- } else if (!IA_ISDIR (buf->ia_type)) {
- /* If file, then get the stat from
- storage node */
- local->stbuf = *buf;
- }
-
- if (local->ia_nlink < buf->ia_nlink) {
- local->ia_nlink = buf->ia_nlink;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_dict = local->dict;
- if (local->return_eio) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] Unable to fix the path (%s) with "
- "self-heal, try manual verification. "
- "returning EIO.", local->loc1.path);
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
- if (local_dict) {
- dict_unref (local_dict);
- }
- return 0;
- }
-
- if (!local->stbuf.ia_blksize) {
- /* Inode not present */
- local->op_ret = -1;
- } else {
- if (!local->revalidate &&
- !IA_ISDIR (local->stbuf.ia_type)) {
- /* If its a file, big array is useless,
- allocate the smaller one */
- int16_t *list = NULL;
- list = GF_CALLOC (1, 2 * (local->index + 1),
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- memcpy (list, local->list, 2 * local->index);
- /* Make the end of the list as -1 */
- GF_FREE (local->list);
- local->list = list;
- local->list [local->index] = -1;
- /* Update the inode's ctx with proper array */
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
- }
-
- if (IA_ISDIR(local->loc1.inode->ia_type)) {
- /* lookup is done for directory */
- if (local->failed && priv->self_heal) {
- /* Triggering self-heal */
- /* means, self-heal required for this
- inode */
- local->inode_generation = 0;
- priv->inode_generation++;
- }
- } else {
- local->stbuf.ia_ino = local->ia_ino;
- }
-
- local->stbuf.ia_nlink = local->ia_nlink;
- }
- if (local->op_ret == -1) {
- if (!local->revalidate && local->list)
- GF_FREE (local->list);
- }
-
- if ((local->op_ret >= 0) && local->failed &&
- local->revalidate) {
- /* Done revalidate, but it failed */
- if ((op_errno != ENOTCONN)
- && (local->op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Revalidate failed for path(%s): %s",
- local->loc1.path, strerror (op_errno));
- }
- local->op_ret = -1;
- }
-
- if ((priv->self_heal && !priv->optimist) &&
- (!local->revalidate && (local->op_ret == 0) &&
- IA_ISDIR(local->stbuf.ia_type))) {
- /* Let the self heal be done here */
- zr_unify_self_heal (frame, this, local);
- local_dict = NULL;
- } else {
- if (local->failed) {
- /* NOTE: directory lookup is sent to all
- * subvolumes and success from a subvolume
- * might set local->op_ret to 0 (zero) */
- local->op_ret = -1;
- }
-
- /* either no self heal, or op_ret == -1 (failure) */
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- }
- if (local_dict) {
- dict_unref (local_dict);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_lookup -
- */
-int32_t
-unify_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- long index = 0;
-
- if (!(loc && loc->inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: Argument not right", loc?loc->path:"(null)");
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL, NULL);
- return 0;
- }
-
- if (inode_ctx_get (loc->inode, this, NULL)
- && IA_ISDIR (loc->inode->ia_type)) {
- local->revalidate = 1;
- }
-
- if (!inode_ctx_get (loc->inode, this, NULL) &&
- loc->inode->ia_type &&
- !IA_ISDIR (loc->inode->ia_type)) {
- uint64_t tmp_list = 0;
- /* check if revalidate or fresh lookup */
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
- }
-
- if (local->list) {
- list = local->list;
- for (index = 0; list[index] != -1; index++);
- if (index != 2) {
- if (index < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ESTALE for %s: file "
- "count is %ld", loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, ESTALE,
- NULL, NULL, NULL, NULL);
- return 0;
- } else {
- /* There are more than 2 presences */
- /* Just log and continue */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: file count is %ld",
- loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- }
- }
-
- /* is revalidate */
- local->revalidate = 1;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)(long)list[index], //cookie
- priv->xl_array [list[index]],
- priv->xl_array [list[index]]->fops->lookup,
- loc,
- xattr_req);
- if (need_break)
- break;
- }
- } else {
- if (loc->inode->ia_type) {
- if (inode_ctx_get (loc->inode, this, NULL)) {
- inode_ctx_get (loc->inode, this,
- &local->inode_generation);
- }
- }
- /* This is first call, there is no list */
- /* call count should be all child + 1 namespace */
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index <= priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- loc,
- xattr_req);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_stat - if directory, get the stat directly from NameSpace child.
- * if file, check for a hint and send it only there (also to NS).
- * if its a fresh stat, then do it on all the nodes.
- *
- * NOTE: for all the call, sending cookie as xlator pointer, which will be
- * used in cbk.
- */
-int32_t
-unify_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- local->ia_ino = loc->inode->ino;
- if (IA_ISDIR (loc->inode->ia_type)) {
- /* Directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->stat, loc);
- } else {
- /* File */
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->stat,
- loc);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_access_cbk -
- */
-int32_t
-unify_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_access - Send request to only namespace, which has all the
- * attributes set for the file.
- */
-int32_t
-unify_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame,
- unify_access_cbk,
- NS(this),
- NS(this)->fops->access,
- loc,
- mask);
-
- return 0;
-}
-
-int32_t
-unify_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if ((op_ret == -1) && !(priv->optimist &&
- (op_errno == ENOENT ||
- op_errno == EEXIST))) {
- /* TODO: Decrement the inode_generation of
- * this->inode's parent inode, hence the missing
- * directory is created properly by self-heal.
- * Currently, there is no way to get the parent
- * inode directly.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- if (op_errno != EEXIST)
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = 0;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->failed) {
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_mkdir_cbk -
- */
-int32_t
-unify_ns_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- long index = 0;
-
- if (op_ret == -1) {
- /* No need to send mkdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->name, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->call_count = priv->child_count;
-
- /* Send mkdir request to all the nodes now */
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_mkdir_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->mkdir,
- &local->loc1,
- local->mode);
- }
-
- return 0;
-}
-
-
-/**
- * unify_mkdir -
- */
-int32_t
-unify_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
-
- loc_copy (&local->loc1, loc);
-
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mkdir_cbk,
- NS(this),
- NS(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-/**
- * unify_rmdir_cbk -
- */
-int32_t
-unify_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_rmdir_cbk -
- */
-int32_t
-unify_ns_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* No need to send rmdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name,
- ((op_errno != ENOTEMPTY) ?
- GF_LOG_ERROR : GF_LOG_DEBUG),
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
- return 0;
- }
-
- local->call_count = priv->child_count;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rmdir_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rmdir,
- &local->loc1);
- }
-
- return 0;
-}
-
-/**
- * unify_rmdir -
- */
-int32_t
-unify_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_rmdir_cbk,
- NS(this),
- NS(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-/**
- * unify_open_cbk -
- */
-int32_t
-unify_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in
- all the f*** / FileIO calls */
- fd_ctx_set (fd, this, (uint64_t)(long)cookie);
- }
- }
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if ((local->failed == 1) && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- //local->op_errno = EIO;
-
- if (!fd_ctx_get (local->fd, this, NULL)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on child node, "
- "failed on namespace");
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on namespace, "
- "failed on child node");
- }
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_open_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->index++;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->list[0] = (int16_t)(long)cookie;
- } else {
- local->list[1] = (int16_t)(long)cookie;
- }
- if (IA_ISDIR (buf->ia_type))
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- file_list[0] = local->list[0];
- file_list[1] = local->list[1];
- file_list[2] = -1;
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->name, local->index);
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning as file found on less "
- "than 2 nodes");
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- return 0;
- }
- }
-
- if (local->failed) {
- /* Open on directory, return EISDIR */
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EISDIR, local->fd);
- return 0;
- }
-
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, local->wbflags);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_open_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_UNWIND (frame, -1, ENOENT);
- return 0;
- }
-
- if (path[0] == '/') {
- local->name = gf_strdup (path);
- ERR_ABORT (local->name);
- } else {
- char *tmp_str = gf_strdup (local->loc1.path);
- char *tmp_base = dirname (tmp_str);
- local->name = GF_CALLOC (1, ZR_PATH_MAX, gf_unify_mt_char);
- strcpy (local->name, tmp_base);
- strncat (local->name, "/", 1);
- strcat (local->name, path);
- GF_FREE (tmp_str);
- }
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send the lookup to all the nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_open_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
-
- return 0;
-}
-#endif /* GF_DARWIN_HOST_OS */
-
-/**
- * unify_open -
- */
-int32_t
-unify_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t file_list[3] = {0,};
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Init */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->fd = fd;
- local->flags = flags;
- local->wbflags = wbflags;
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- local->list = list;
- file_list[0] = priv->child_count; /* Thats namespace */
- file_list[2] = -1;
- for (index = 0; list[index] != -1; index++) {
- local->call_count++;
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->call_count != 2) {
- /* If the lookup was done for file */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: entry_count is %d",
- loc->path, local->call_count);
- for (index = 0; local->list[index] != -1; index++)
- gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
- loc->path, priv->xl_array[list[index]]->name);
-
- if (local->call_count < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on onlyone node");
- STACK_UNWIND (frame, -1, EIO, fd);
- return 0;
- }
- }
-
-#ifdef GF_DARWIN_HOST_OS
- /* Handle symlink here */
- if (IA_ISLNK (loc->inode->ia_type)) {
- /* Callcount doesn't matter here */
- STACK_WIND (frame,
- unify_open_readlink_cbk,
- NS(this),
- NS(this)->fops->readlink,
- loc, ZR_PATH_MAX);
- return 0;
- }
-#endif /* GF_DARWIN_HOST_OS */
-
- local->call_count = 2;
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]], //cookie
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- loc,
- flags,
- fd, wbflags);
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-
-int32_t
-unify_create_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- inode_t *inode = local->loc1.inode;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_create_open_cbk -
- */
-int32_t
-unify_create_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int ret = 0;
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_value = 0;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in all
- the f*** / FileIO calls */
- /* TODO: log on failure */
- ret = fd_ctx_get (fd, this, &tmp_value);
- cookie = (void *)(long)tmp_value;
- } else {
- /* NOTE: open successful on namespace.
- * fd's ctx can be used to identify open
- * failure on storage subvolume. cool
- * ide ;) */
- local->failed = 0;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- ((xlator_t *)cookie)->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed == 1 && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- local->op_errno = EIO;
- local->fd = fd;
- local->call_count = 1;
-
- if (!fd_ctx_get (local->fd, this, &tmp_value)) {
- child = (xlator_t *)(long)tmp_value;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on child node, "
- "failed on namespace");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- child,
- child->fops->unlink,
- &local->loc1);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on namespace, "
- "failed on child node");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- }
- return 0;
- }
- inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
- return 0;
-}
-
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_create_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->list[local->index++] = (int16_t)(long)cookie;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->ia_ino = buf->ia_ino;
- } else {
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t *list = local->list;
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- local->list [local->index] = -1;
- file_list[0] = list[0];
- file_list[1] = list[1];
- file_list[2] = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->loc1.path, local->index);
- file_list[0] = priv->child_count;
- for (index = 0; list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", local->loc1.path,
- priv->xl_array[list[index]]->name);
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on "
- "only one node");
- STACK_UNWIND (frame, -1, EIO,
- local->fd, inode, NULL,
- NULL, NULL);
- return 0;
- }
- }
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_create_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, 0);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_create_cbk -
- */
-int32_t
-unify_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- inode_t *tmp_inode = NULL;
-
- if (op_ret == -1) {
- /* send unlink () on Namespace */
- local->op_errno = op_errno;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s (file %s, error %s), "
- "sending unlink to namespace",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->stbuf = *buf;
- /* Just inode number should be from NS node */
- local->stbuf.ia_ino = local->ia_ino;
-
- /* TODO: log on failure */
- ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_create_cbk -
- *
- */
-int32_t
-unify_ns_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send create request to other servers, as
- namespace action failed. Handle exclusive create here. */
- if ((op_errno != EEXIST) ||
- ((op_errno == EEXIST) &&
- ((local->flags & O_EXCL) == O_EXCL))) {
- /* If its just a create call without O_EXCL,
- don't do this */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
- }
- }
-
- if (op_ret >= 0) {
- /* Get the inode number from the NS node */
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->op_ret = -1;
-
- /* Start the mapping list */
- list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- inode_ctx_put (inode, this, (uint64_t)(long)list);
- list[0] = priv->child_count;
- list[2] = -1;
-
- /* This means, file doesn't exist anywhere in the Filesystem */
- sched_ops = priv->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (sched_xl == NULL)
- {
- /* send unlink () on Namespace */
- local->op_errno = ENOTCONN;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "no node online to schedule create:(file %s) "
- "sending unlink to namespace",
- (local->loc1.path)?local->loc1.path:"");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_create_cbk,
- sched_xl, sched_xl->fops->create,
- &local->loc1, local->flags, local->mode, fd);
- } else {
- /* File already exists, and there is no O_EXCL flag */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "File(%s) already exists on namespace, sending "
- "open instead", local->loc1.path);
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send lookup() to all nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_create_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
- }
- return 0;
-}
-
-/**
- * unify_create - create a file in global namespace first, so other
- * clients can see them. Create the file in storage nodes in background.
- */
-int32_t
-unify_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->flags = flags;
- local->fd = fd;
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_create_cbk,
- NS(this),
- NS(this)->fops->create,
- loc,
- flags | O_EXCL,
- mode,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_opendir_cbk -
- */
-int32_t
-unify_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-/**
- * unify_opendir -
- */
-int32_t
-unify_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame, unify_opendir_cbk,
- NS(this), NS(this)->fops->opendir, loc, fd);
-
- return 0;
-}
-
-
-int32_t
-unify_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = statpost->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (statpost->ia_type) ||
- !local->stpost.ia_blksize) {
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
-
- if ((!IA_ISDIR (statpost->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stpre.ia_ino = local->ia_ino;
- local->stpost.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stpre, &local->stpost);
- }
-
- return 0;
-}
-
-
-int32_t
-unify_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- if (!(loc && loc->inode)) {
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setattr_cbk,
- NS (this),
- NS (this)->fops->setattr,
- loc, stbuf, valid);
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_setattr_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->setattr,
- loc, stbuf, valid);
-
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_setattr_cbk, child,
- child->fops->fsetattr, fd, stbuf, valid);
-
- STACK_WIND (frame, unify_setattr_cbk, NS(this),
- NS(this)->fops->fsetattr, fd, stbuf, valid);
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, unify_setattr_cbk,
- NS(this), NS(this)->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate_cbk -
- */
-int32_t
-unify_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- local->op_errno = op_errno;
- if (!((op_errno == ENOENT) && priv->optimist))
- local->op_ret = -1;
- }
-
- if (op_ret >= 0) {
- if (NS (this) == prev_frame->this) {
- local->ia_ino = postbuf->ia_ino;
- /* If the entry is directory, get the
- stat from NS node */
- if (IA_ISDIR (postbuf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
-
- if ((!IA_ISDIR (postbuf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from
- Storage node. */
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->ia_ino) {
- local->stbuf.ia_ino = local->ia_ino;
- local->poststbuf.ia_ino = local->ia_ino;
- } else {
- local->op_ret = -1;
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->poststbuf);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate -
- */
-int32_t
-unify_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->ia_ino = loc->inode->ino;
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_truncate_cbk,
- NS(this),
- NS(this)->fops->truncate,
- loc,
- 0);
- } else {
- local->op_ret = 0;
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- /* Don't send offset to NS truncate */
- STACK_WIND (frame, unify_truncate_cbk, NS(this),
- NS(this)->fops->truncate, loc, 0);
- callcnt--;
-
- for (index = 0; local->list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[local->list[index]]) {
- STACK_WIND (frame,
- unify_truncate_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->truncate,
- loc,
- offset);
- if (!--callcnt)
- break;
- }
- }
- }
-
- return 0;
-}
-
-/**
- * unify_readlink_cbk -
- */
-int32_t
-unify_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
- return 0;
-}
-
-/**
- * unify_readlink - Read the link only from the storage node.
- */
-int32_t
-unify_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- unify_private_t *priv = this->private;
- int32_t entry_count = 0;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- entry_count++;
-
- if (entry_count >= 2) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_readlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->readlink,
- loc,
- size);
- break;
- }
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ENOENT, no softlink files found "
- "on storage node");
- STACK_UNWIND (frame, -1, ENOENT, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink_cbk -
- */
-int32_t
-unify_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- if (((call_frame_t *)cookie)->this == NS(this)) {
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink -
- */
-int32_t
-unify_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_unlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->unlink,
- loc);
- if (need_break)
- break;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: returning ENOENT", loc->path);
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_readv_cbk -
- */
-int32_t
-unify_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-/**
- * unify_readv -
- */
-int32_t
-unify_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_readv_cbk,
- child,
- child->fops->readv,
- fd,
- size,
- offset);
-
-
- return 0;
-}
-
-/**
- * unify_writev_cbk -
- */
-int32_t
-unify_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- unify_local_t *local = NULL;
-
- local = frame->local;
-
- local->stbuf = *prebuf;
- local->stbuf.ia_ino = local->ia_ino;
-
- local->poststbuf = *postbuf;
- local->poststbuf.ia_ino = local->ia_ino;
-
- STACK_UNWIND (frame, op_ret, op_errno,
- &local->stbuf, &local->poststbuf);
- return 0;
-}
-
-/**
- * unify_writev -
- */
-int32_t
-unify_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
- unify_local_t *local = NULL;
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_writev_cbk,
- child,
- child->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
-
- return 0;
-}
-
-/**
- * unify_ftruncate -
- */
-int32_t
-unify_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- xlator_t *child = NULL;
- unify_local_t *local = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->op_ret = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_truncate_cbk,
- child, child->fops->ftruncate,
- fd, offset);
-
- STACK_WIND (frame, unify_truncate_cbk,
- NS(this), NS(this)->fops->ftruncate,
- fd, 0);
-
- return 0;
-}
-
-
-/**
- * unify_flush_cbk -
- */
-int32_t
-unify_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_flush -
- */
-int32_t
-unify_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_flush_cbk, child,
- child->fops->flush, fd);
-
- return 0;
-}
-
-
-/**
- * unify_fsync_cbk -
- */
-int32_t
-unify_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-/**
- * unify_fsync -
- */
-int32_t
-unify_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fsync_cbk, child,
- child->fops->fsync, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
- * both namespace and the storage node if its a file.
- */
-int32_t
-unify_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fstat, fd);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-/**
- * unify_getdents_cbk -
- */
-int32_t
-unify_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- return 0;
-}
-
-/**
- * unify_getdents - send the FOP request to all the nodes.
- */
-int32_t
-unify_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_getdents_cbk, NS(this),
- NS(this)->fops->getdents, fd, size, offset, flag);
-
- return 0;
-}
-
-
-/**
- * unify_readdir_cbk -
- */
-int32_t
-unify_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-/**
- * unify_readdir - send the FOP request to all the nodes.
- */
-int32_t
-unify_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdir_cbk, NS(this),
- NS(this)->fops->readdir, fd, size, offset);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdirp_cbk, NS(this),
- NS(this)->fops->readdirp, fd, size, offset);
-
- return 0;
-}
-
-
-/**
- * unify_fsyncdir_cbk -
- */
-int32_t
-unify_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/**
- * unify_fsyncdir -
- */
-int32_t
-unify_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_fsyncdir_cbk,
- NS(this), NS(this)->fops->fsyncdir, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_lk_cbk - UNWIND frame with the proper return arguments.
- */
-int32_t
-unify_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-/**
- * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
- */
-int32_t
-unify_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_lk_cbk, child,
- child->fops->lk, fd, cmd, lock);
-
- return 0;
-}
-
-
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-static int32_t
-unify_setxattr_file_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_private_t *private = this->private;
- unify_local_t *local = frame->local;
- xlator_t *sched_xl = NULL;
- struct sched_ops *sched_ops = NULL;
-
- if (op_ret == -1) {
- if (!ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr with XATTR_CREATE on ns: "
- "path(%s) key(%s): %s",
- local->loc1.path, local->name,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- LOCK (&frame->lock);
- {
- local->failed = 0;
- local->op_ret = 0;
- local->op_errno = 0;
- local->call_count = 1;
- }
- UNLOCK (&frame->lock);
-
- /* schedule XATTR_CREATE on one of the child node */
- sched_ops = private->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->name);
- if (!sched_xl) {
- STACK_UNWIND (frame, -1, ENOTCONN);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_setxattr_cbk,
- sched_xl,
- sched_xl->fops->setxattr,
- &local->loc1,
- local->dict,
- local->flags);
- return 0;
-}
-
-/**
- * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
- */
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- dict_t *dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, (((op_errno == ENOENT) ||
- (op_errno == ENOTSUP))?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- if (local->failed == -1) {
- local->failed = 1;
- }
- local->op_errno = op_errno;
- } else {
- local->failed = 0;
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed && local->name &&
- ZR_FILE_CONTENT_REQUEST(local->name)) {
- dict = get_new_dict ();
- dict_set (dict, local->dict->members_list->key,
- data_from_dynptr(NULL, 0));
- dict_ref (dict);
-
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setxattr_file_cbk,
- NS(this),
- NS(this)->fops->setxattr,
- &local->loc1,
- dict,
- XATTR_CREATE);
-
- dict_unref (dict);
- return 0;
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_sexattr - This function should be sent to all the storage nodes,
- * which contains the file, (excluding namespace).
- */
-int32_t
-unify_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
- data_pair_t *trav = dict->members_list;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->failed = -1;
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
-
- if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
- /* direct the storage xlators to change file
- content only if file exists */
- local->flags = flags;
- local->dict = dict;
- local->name = gf_strdup (trav->key);
- flags |= XATTR_REPLACE;
- }
-
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setxattr,
- loc, dict, flags);
- }
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->setxattr,
- loc,
- dict,
- flags);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- /* No entry in storage nodes */
- gf_log (this->name, GF_LOG_DEBUG,
- "returning ENOENT, file not found on storage node.");
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-/**
- * unify_getxattr_cbk - This function is called from only one child, so, no
- * need of any lock or anything else, just send it to above layer
- */
-int32_t
-unify_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value)
-{
- int32_t callcnt = 0;
- dict_t *local_value = NULL;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name,
- (((op_errno == ENOENT) ||
- (op_errno == ENODATA) ||
- (op_errno == ENOTSUP)) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- } else {
- if (!local->dict)
- local->dict = dict_ref (value);
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_value = local->dict;
- local->dict = NULL;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local_value);
-
- if (local_value)
- dict_unref (local_value);
- }
-
- return 0;
-}
-
-
-/**
- * unify_getxattr - This FOP is sent to only the storage node.
- */
-int32_t
-unify_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t count = 0;
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getxattr,
- loc,
- name);
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- count++;
- }
- }
-
- if (count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->getxattr,
- loc,
- name);
- if (!--count)
- break;
- }
- }
- } else {
- dict_t *tmp_dict = get_new_dict ();
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENODATA, no file found on storage node",
- loc->path);
- STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
- dict_destroy (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr_cbk - Wait till all the child node returns the call
- * and then UNWIND to above layer.
- */
-int32_t
-unify_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- if (op_errno != ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
- } else {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr - Send it to all the child nodes which has the files.
- */
-int32_t
-unify_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->removexattr,
- loc,
- name);
-
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->removexattr,
- loc,
- name);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENOENT, not found on storage node.", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-int32_t
-unify_mknod_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- /* No log required here as this -1 is for mknod call */
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_mknod_cbk -
- */
-int32_t
-unify_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, sending unlink to "
- "namespace");
- local->op_errno = op_errno;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- return 0;
-}
-
-/**
- * unify_ns_mknod_cbk -
- */
-int32_t
-unify_ns_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- /* No need to send mknod request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name, local->loc1.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count;
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send mknod request to scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, no node online "
- "at the moment, sending unlink to NS");
- local->op_errno = ENOTCONN;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_mknod_cbk,
- sched_xl, sched_xl->fops->mknod,
- &local->loc1, local->mode, local->dev);
-
- return 0;
-}
-
-/**
- * unify_mknod - Create a device on namespace first, and later create on
- * the storage node.
- */
-int32_t
-unify_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->dev = rdev;
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mknod_cbk,
- NS(this),
- NS(this)->fops->mknod,
- loc,
- mode,
- rdev);
-
- return 0;
-}
-
-int32_t
-unify_symlink_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_symlink_cbk -
- */
-int32_t
-unify_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, sending unlink "
- "to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_symlink_cbk -
- */
-int32_t
-unify_ns_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
-
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- int16_t *list = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send symlink request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Start the mapping list */
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count; //namespace's index
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send symlink request to all the nodes now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, no node online, "
- "sending unlink to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame,
- unify_symlink_cbk,
- sched_xl,
- sched_xl->fops->symlink,
- local->name,
- &local->loc1);
-
- return 0;
-}
-
-/**
- * unify_symlink -
- */
-int32_t
-unify_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->name = gf_strdup (linkpath);
-
- if ((local->name == NULL) ||
- (local->loc1.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_symlink_cbk,
- NS(this),
- NS(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-
-int32_t
-unify_rename_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
-}
-
-int32_t
-unify_ns_rename_undo_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- }
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
-}
-
-int32_t
-unify_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (!IA_ISDIR (buf->ia_type))
- local->stbuf = *buf;
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- if (IA_ISDIR (local->loc1.inode->ia_type)) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->oldpreparent,
- &local->oldpostparent, &local->newpreparent,
- &local->newpostparent);
- return 0;
- }
-
- if (local->op_ret == -1) {
- /* TODO: check this logic */
-
- /* Rename failed in storage node, successful on NS,
- * hence, rename back the entries in NS */
- /* NOTE: this will be done only if the destination
- * doesn't exists, if the destination exists, the
- * job of correcting NS is left to self-heal
- */
- if (!local->index) {
- loc_t tmp_oldloc = {
- /* its actual 'newloc->path' */
- .path = local->loc2.path,
- .inode = local->loc1.inode,
- .parent = local->loc2.parent
- };
-
- loc_t tmp_newloc = {
- /* Actual 'oldloc->path' */
- .path = local->loc1.path,
- .parent = local->loc1.parent
- };
-
- gf_log (this->name, GF_LOG_ERROR,
- "rename succussful on namespace, on "
- "stroage node failed, reverting back");
-
- STACK_WIND (frame,
- unify_ns_rename_undo_cbk,
- NS(this),
- NS(this)->fops->rename,
- &tmp_oldloc,
- &tmp_newloc);
- return 0;
- }
- } else {
- /* Rename successful on storage nodes */
-
- int32_t idx = 0;
- int16_t *tmp_list = NULL;
- uint64_t tmp_list_int64 = 0;
- if (local->loc2.inode) {
- inode_ctx_get (local->loc2.inode,
- this, &tmp_list_int64);
- list = (int16_t *)(long)tmp_list_int64;
-
- }
-
- if (list) {
- for (index = 0; list[index] != -1; index++);
- tmp_list = GF_CALLOC (1, index * 2,
- gf_unify_mt_int16_t);
- memcpy (tmp_list, list, index * 2);
-
- for (index = 0; list[index] != -1; index++) {
- /* TODO: Check this logic. */
- /* If the destination file exists in
- * the same storage node where we sent
- * 'rename' call, no need to send
- * unlink
- */
- for (idx = 0;
- local->list[idx] != -1; idx++) {
- if (tmp_list[index] == local->list[idx]) {
- tmp_list[index] = priv->child_count;
- continue;
- }
- }
-
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- if (callcnt > 1)
- gf_log (this->name,
- GF_LOG_ERROR,
- "%s->%s: more (%d) "
- "subvolumes have the "
- "newloc entry",
- local->loc1.path,
- local->loc2.path,
- callcnt);
-
- for (index=0;
- tmp_list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- STACK_WIND (frame,
- unify_rename_unlink_cbk,
- priv->xl_array[tmp_list[index]],
- priv->xl_array[tmp_list[index]]->fops->unlink,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
-
- GF_FREE (tmp_list);
- return 0;
- }
- if (tmp_list)
- GF_FREE (tmp_list);
- }
- }
-
- /* Need not send 'unlink' to storage node */
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent,
- &local->newpreparent, &local->newpostparent);
- }
-
- return 0;
-}
-
-int32_t
-unify_ns_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Free local->new_inode */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
- }
-
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preoldparent;
- local->oldpostparent = *postoldparent;
- local->newpreparent = *prenewparent;
- local->newpostparent = *postnewparent;
-
- /* Everything is fine. */
- if (IA_ISDIR (buf->ia_type)) {
- local->call_count = priv->child_count;
- for (index=0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rename,
- &local->loc1,
- &local->loc2);
- }
-
- return 0;
- }
-
- local->call_count = 0;
- /* send rename */
- list = local->list;
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->rename,
- &local->loc1,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
- } else {
- /* file doesn't seem to be present in storage nodes */
- gf_log (this->name, GF_LOG_CRITICAL,
- "CRITICAL: source file not in storage node, "
- "rename successful on namespace :O");
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- }
- return 0;
-}
-
-
-/**
- * unify_rename - One of the tricky function. The deadliest of all :O
- */
-int32_t
-unify_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- if ((local->loc1.path == NULL) ||
- (local->loc2.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- return 0;
- }
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_rename_cbk,
- NS(this),
- NS(this)->fops->rename,
- oldloc,
- newloc);
- return 0;
-}
-
-/**
- * unify_link_cbk -
- */
-int32_t
-unify_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret >= 0)
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_link_cbk -
- */
-int32_t
-unify_ns_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- int16_t *list = local->list;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send link request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Update inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Send link request to the node now */
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- if (priv->xl_array[list[index]] != NS (this)) {
- STACK_WIND (frame,
- unify_link_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->link,
- &local->loc1,
- &local->loc2);
- break;
- }
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-/**
- * unify_link -
- */
-int32_t
-unify_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_link_cbk,
- NS(this),
- NS(this)->fops->link,
- oldloc,
- newloc);
-
- return 0;
-}
-
-
-/**
- * unify_checksum_cbk -
- */
-int32_t
-unify_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
-
- return 0;
-}
-
-/**
- * unify_checksum -
- */
-int32_t
-unify_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- STACK_WIND (frame,
- unify_checksum_cbk,
- NS(this),
- NS(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-/**
- * unify_finodelk_cbk -
- */
-int
-unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_finodelk
- */
-int
-unify_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int cmd, struct gf_flock *flock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_finodelk_cbk,
- child, child->fops->finodelk,
- volume, fd, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_fentrylk_cbk -
- */
-int
-unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fentrylk
- */
-int
-unify_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fentrylk_cbk,
- child, child->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_fxattrop_cbk -
- */
-int
-unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_fxattrop
- */
-int
-unify_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fxattrop_cbk,
- child, child->fops->fxattrop,
- fd, optype, xattr);
-
- return 0;
-}
-
-
-/**
- * unify_inodelk_cbk -
- */
-int
-unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_inodelk
- */
-int
-unify_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int cmd, struct gf_flock *flock)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_inodelk_cbk,
- child, child->fops->inodelk,
- volume, loc, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_entrylk_cbk -
- */
-int
-unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_entrylk
- */
-int
-unify_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_entrylk_cbk,
- child, child->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_xattrop_cbk -
- */
-int
-unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_xattrop
- */
-int
-unify_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_xattrop_cbk,
- child, child->fops->xattrop,
- loc, optype, xattr);
-
- return 0;
-}
-
-int
-unify_forget (xlator_t *this,
- inode_t *inode)
-{
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- if (inode->ia_type && (!IA_ISDIR(inode->ia_type))) {
- inode_ctx_get (inode, this, &tmp_list);
- if (tmp_list) {
- list = (int16_t *)(long)tmp_list;
- GF_FREE (list);
- }
- }
-
- return 0;
-}
-
-/**
- * notify
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- unify_private_t *priv = this->private;
- struct sched_ops *sched = NULL;
-
- if (!priv) {
- return 0;
- }
-
- sched = priv->sched_ops;
- if (!sched) {
- gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
- raise (SIGTERM);
- return 0;
- }
- if (priv->namespace == data) {
- if (event == GF_EVENT_CHILD_UP) {
- sched->notify (this, event, data);
- }
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- /* Call scheduler's update () to enable it for scheduling */
- sched->notify (this, event, data);
-
- LOCK (&priv->lock);
- {
- /* Increment the inode's generation, which is
- used for self_heal */
- ++priv->inode_generation;
- ++priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (!priv->is_up) {
- default_notify (this, event, data);
- priv->is_up = 1;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* Call scheduler's update () to disable the child node
- * for scheduling
- */
- sched->notify (this, event, data);
- LOCK (&priv->lock);
- {
- --priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (priv->num_child_up == 0) {
- /* Send CHILD_DOWN to upper layer */
- default_notify (this, event, data);
- priv->is_up = 0;
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_unify_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * init - This function is called first in the xlator, while initializing.
- * All the config file options are checked and appropriate flags are set.
- *
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = 0;
- int32_t count = 0;
- data_t *scheduler = NULL;
- data_t *data = NULL;
- xlator_t *ns_xl = NULL;
- xlator_list_t *trav = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t *parent = NULL;
- unify_private_t *_private = NULL;
-
-
- /* Check for number of child nodes, if there is no child nodes, exit */
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "No child nodes specified. check \"subvolumes \" "
- "option in volfile");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- /* Check for 'scheduler' in volume */
- scheduler = dict_get (this->options, "scheduler");
- if (!scheduler) {
- gf_log (this->name, GF_LOG_ERROR,
- "\"option scheduler <x>\" is missing in volfile");
- return -1;
- }
-
- /* Setting "option namespace <node>" */
- data = dict_get (this->options, "namespace");
- if(!data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace option not specified, Exiting");
- return -1;
- }
- /* Search namespace in the child node, if found, exit */
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, data->data) == 0)
- break;
- trav = trav->next;
- }
- if (trav) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node used as a subvolume, Exiting");
- return -1;
- }
-
- /* Search for the namespace node, if found, continue */
- ns_xl = this->next;
- while (ns_xl) {
- if (strcmp (ns_xl->name, data->data) == 0)
- break;
- ns_xl = ns_xl->next;
- }
- if (!ns_xl) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node not found in volfile, Exiting");
- return -1;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "namespace node specified as %s", data->data);
-
- _private = GF_CALLOC (1, sizeof (*_private),
- gf_unify_mt_unify_private_t);
- ERR_ABORT (_private);
- _private->sched_ops = get_scheduler (this, scheduler->data);
- if (!_private->sched_ops) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Error while loading scheduler. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- if (ns_xl->parents) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Namespace node should not be a child of any other node. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- _private->namespace = ns_xl;
-
- /* update _private structure */
- {
- count = 0;
- trav = this->children;
- /* Get the number of child count */
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Child node count is %d", count);
-
- _private->child_count = count;
- if (count == 1) {
- /* TODO: Should I error out here? */
- gf_log (this->name, GF_LOG_CRITICAL,
- "WARNING: You have defined only one "
- "\"subvolumes\" for unify volume. It may not "
- "be the desired config, review your volume "
- "volfile. If this is how you are testing it,"
- " you may hit some performance penalty");
- }
-
- _private->xl_array = GF_CALLOC (1,
- sizeof (xlator_t) * (count + 1),
- gf_unify_mt_xlator_t);
- ERR_ABORT (_private->xl_array);
-
- count = 0;
- trav = this->children;
- while (trav) {
- _private->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
- _private->xl_array[count] = _private->namespace;
-
- /* self-heal part, start with generation '1' */
- _private->inode_generation = 1;
- /* Because, Foreground part is tested well */
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
- data = dict_get (this->options, "self-heal");
- if (data) {
- if (strcasecmp (data->data, "off") == 0)
- _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
-
- if (strcasecmp (data->data, "foreground") == 0)
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
-
- if (strcasecmp (data->data, "background") == 0)
- _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
- }
-
- /* optimist - ask bulde for more about it */
- data = dict_get (this->options, "optimist");
- if (data) {
- if (gf_string2boolean (data->data,
- &_private->optimist) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "optimist excepts only boolean "
- "options");
- }
- }
-
- LOCK_INIT (&_private->lock);
- }
-
- /* Now that everything is fine. */
- this->private = (void *)_private;
- {
- ret = _private->sched_ops->mem_acct_init (this);
-
- if (ret == -1) {
- return -1;
- }
-
- /* Initialize scheduler, if everything else is successful */
- ret = _private->sched_ops->init (this);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Initializing scheduler failed, Exiting");
- GF_FREE (_private);
- return -1;
- }
-
-
- ret = 0;
-
- /* This section is required because some fops may look
- * for 'xl->parent' variable
- */
- xlparent = GF_CALLOC (1, sizeof (*xlparent),
- gf_unify_mt_xlator_list_t);
- xlparent->xlator = this;
- if (!ns_xl->parents) {
- ns_xl->parents = xlparent;
- } else {
- parent = ns_xl->parents;
- while (parent->next)
- parent = parent->next;
- parent->next = xlparent;
- }
- }
-
- /* Tell namespace node that init is done */
- xlator_notify (ns_xl, GF_EVENT_PARENT_UP, this);
-
- return 0;
-}
-
-/**
- * fini - Free all the allocated memory
- */
-void
-fini (xlator_t *this)
-{
- unify_private_t *priv = this->private;
- priv->sched_ops->fini (this);
- this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv->xl_array);
- GF_FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .stat = unify_stat,
- .readlink = unify_readlink,
- .mknod = unify_mknod,
- .mkdir = unify_mkdir,
- .unlink = unify_unlink,
- .rmdir = unify_rmdir,
- .symlink = unify_symlink,
- .rename = unify_rename,
- .link = unify_link,
- .truncate = unify_truncate,
- .create = unify_create,
- .open = unify_open,
- .readv = unify_readv,
- .writev = unify_writev,
- .statfs = unify_statfs,
- .flush = unify_flush,
- .fsync = unify_fsync,
- .setxattr = unify_setxattr,
- .getxattr = unify_getxattr,
- .removexattr = unify_removexattr,
- .opendir = unify_opendir,
- .readdir = unify_readdir,
- .readdirp = unify_readdirp,
- .fsyncdir = unify_fsyncdir,
- .access = unify_access,
- .ftruncate = unify_ftruncate,
- .fstat = unify_fstat,
- .lk = unify_lk,
- .lookup = unify_lookup,
- .getdents = unify_getdents,
- .checksum = unify_checksum,
- .inodelk = unify_inodelk,
- .finodelk = unify_finodelk,
- .entrylk = unify_entrylk,
- .fentrylk = unify_fentrylk,
- .xattrop = unify_xattrop,
- .fxattrop = unify_fxattrop,
- .setattr = unify_setattr,
- .fsetattr = unify_fsetattr,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = unify_forget,
-};
-
-struct volume_options options[] = {
- { .key = { "namespace" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler" },
- .value = { "alu", "rr", "random", "nufa", "switch" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"self-heal"},
- .value = { "foreground", "background", "off" },
- .type = GF_OPTION_TYPE_STR
- },
- /* TODO: remove it some time later */
- { .key = {"optimist"},
- .type = GF_OPTION_TYPE_BOOL
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
deleted file mode 100644
index dbd5e44a2..000000000
--- a/xlators/cluster/unify/src/unify.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _UNIFY_H
-#define _UNIFY_H
-
-#include "scheduler.h"
-#include "list.h"
-#include "unify-mem-types.h"
-
-#define MAX_DIR_ENTRY_STRING (32 * 1024)
-
-#define ZR_UNIFY_SELF_HEAL_OFF 0
-#define ZR_UNIFY_FG_SELF_HEAL 1
-#define ZR_UNIFY_BG_SELF_HEAL 2
-
-/* Sometimes one should use completely random numbers.. its good :p */
-#define UNIFY_SELF_HEAL_GETDENTS_COUNT 512
-
-#define NS(xl) (((unify_private_t *)xl->private)->namespace)
-
-/* This is used to allocate memory for local structure */
-#define INIT_LOCAL(fr, loc) \
-do { \
- loc = GF_CALLOC (1, sizeof (unify_local_t), gf_unify_mt_unify_local_t); \
- ERR_ABORT (loc); \
- if (!loc) { \
- STACK_UNWIND (fr, -1, ENOMEM); \
- return 0; \
- } \
- fr->local = loc; \
- loc->op_ret = -1; \
- loc->op_errno = ENOENT; \
-} while (0)
-
-
-
-struct unify_private {
- /* Update this structure depending on requirement */
- void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
- if xlator is using scheduler */
- struct sched_ops *sched_ops; /* Scheduler options */
- xlator_t *namespace; /* ptr to namespace xlator */
- xlator_t **xl_array;
- gf_boolean_t optimist;
- int16_t child_count;
- int16_t num_child_up;
- uint8_t self_heal;
- uint8_t is_up;
- uint64_t inode_generation;
- gf_lock_t lock;
-};
-typedef struct unify_private unify_private_t;
-
-struct unify_self_heal_struct {
- uint8_t dir_checksum[NAME_MAX];
- uint8_t ns_dir_checksum[NAME_MAX];
- uint8_t file_checksum[NAME_MAX];
- uint8_t ns_file_checksum[NAME_MAX];
- off_t *offset_list;
- int *count_list;
- dir_entry_t **entry_list;
-};
-
-
-struct _unify_local_t {
- int32_t call_count;
- int32_t op_ret;
- int32_t op_errno;
- mode_t mode;
- off_t offset;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- int32_t flags;
- int32_t entry_count;
- int32_t count; // dir_entry_t count;
- fd_t *fd;
- struct iatt stbuf;
- struct iatt stpre;
- struct iatt stpost;
- struct statvfs statvfs_buf;
- struct timespec tv[2];
- char *name;
- int32_t revalidate;
-
- ino_t ia_ino;
- nlink_t ia_nlink;
-
- dict_t *dict;
-
- int16_t *list;
- int16_t *new_list; /* Used only in case of rename */
- int16_t index;
-
- int32_t failed;
- int32_t return_eio; /* Used in case of different st-mode
- present for a given path */
-
- uint64_t inode_generation; /* used to store the per directory
- * inode_generation. Got from inode's ctx
- * of directory inodes
- */
-
- struct unify_self_heal_struct *sh_struct;
- loc_t loc1, loc2;
-
- struct iatt poststbuf;
- /* When not used for rename, old*
- * are used as the attrs for the current
- * parent directory.
- */
- struct iatt oldpreparent;
- struct iatt oldpostparent;
- struct iatt newpreparent;
- struct iatt newpostparent;
- int32_t wbflags;
-};
-typedef struct _unify_local_t unify_local_t;
-
-int32_t zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local);
-
-#endif /* _UNIFY_H */
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
index df9080358..5075c59a8 100644
--- a/xlators/debug/error-gen/src/Makefile.am
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -2,15 +2,16 @@
xlator_LTLIBRARIES = error-gen.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-error_gen_la_LDFLAGS = -module -avoidversion
+error_gen_la_LDFLAGS = -module -avoid-version
error_gen_la_SOURCES = error-gen.c
error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = error-gen.h error-gen-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
index b643dc5f7..f02280535 100644
--- a/xlators/debug/error-gen/src/error-gen-mem-types.h
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ERROR_GEN_MEM_TYPES_H__
#define __ERROR_GEN_MEM_TYPES_H__
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index 6d6c5f24a..ec0874b35 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -24,6 +14,7 @@
#include "xlator.h"
#include "error-gen.h"
+#include "statedump.h"
sys_error_t error_no_list[] = {
[GF_FOP_LOOKUP] = { .error_no_count = 4,
@@ -91,9 +82,10 @@ sys_error_t error_no_list[] = {
[GF_FOP_READ] = { .error_no_count = 5,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
ENAMETOOLONG}},
- [GF_FOP_WRITE] = { .error_no_count = 5,
+ [GF_FOP_WRITE] = { .error_no_count = 7,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
- ENAMETOOLONG}},
+ ENAMETOOLONG,ENOSPC,
+ GF_ERROR_SHORT_WRITE}},
[GF_FOP_STATFS] = {.error_no_count = 10,
.error_no = {EACCES,EBADF,EFAULT,EINTR,
EIO,ENAMETOOLONG,ENOENT,
@@ -113,6 +105,15 @@ sys_error_t error_no_list[] = {
[GF_FOP_REMOVEXATTR] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,ENAMETOOLONG,
EINTR}},
+ [GF_FOP_FSETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FGETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FREMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
[GF_FOP_OPENDIR] = { .error_no_count = 8,
.error_no = {EACCES,EEXIST,EFAULT,
EISDIR,EMFILE,
@@ -237,6 +238,8 @@ conv_errno_to_int (char **error_no)
return EINTR;
else if (!strcmp ((*error_no), "EFBIG"))
return EFBIG;
+ else if (!strcmp((*error_no), "GF_ERROR_SHORT_WRITE"))
+ return GF_ERROR_SHORT_WRITE;
else
return EAGAIN;
}
@@ -286,6 +289,12 @@ get_fop_int (char **op_no_str)
return GF_FOP_GETXATTR;
else if (!strcmp ((*op_no_str), "removexattr"))
return GF_FOP_REMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "fsetxattr"))
+ return GF_FOP_FSETXATTR;
+ else if (!strcmp ((*op_no_str), "fgetxattr"))
+ return GF_FOP_FGETXATTR;
+ else if (!strcmp ((*op_no_str), "fremovexattr"))
+ return GF_FOP_FREMOVEXATTR;
else if (!strcmp ((*op_no_str), "opendir"))
return GF_FOP_OPENDIR;
else if (!strcmp ((*op_no_str), "readdir"))
@@ -362,7 +371,8 @@ error_gen (xlator_t *this, int op_no)
rand_no = 0;
ret = error_no_list[op_no].error_no[rand_no];
}
- egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
+ if (egp->random_failure == _gf_true)
+ egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
}
return ret;
}
@@ -371,17 +381,17 @@ error_gen (xlator_t *this, int op_no)
int
error_gen_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode,
- buf, dict, postparent);
- return 0;
+ buf, xdata, postparent);
+ return 0;
}
int
error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -396,36 +406,28 @@ error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ NULL);
+ return 0;
}
STACK_WIND (frame, error_gen_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
- return 0;
-}
-
-
-int
-error_gen_forget (xlator_t *this, inode_t *inode)
-{
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -439,32 +441,31 @@ error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
-
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
int
error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -478,21 +479,21 @@ error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
+ loc, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -506,32 +507,32 @@ error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
+ fd, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -546,32 +547,32 @@ error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (truncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
+ loc, offset, xdata);
+ return 0;
}
int
error_gen_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp =NULL;
@@ -586,31 +587,30 @@ error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
+ fd, offset, xdata);
+ return 0;
}
int
error_gen_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
+ int32_t mask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -624,31 +624,31 @@ error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (access, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
- return 0;
+ loc, mask, xdata);
+ return 0;
}
int
error_gen_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf)
+ const char *path, struct iatt *sbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf, xdata);
+ return 0;
}
int
error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
+ size_t size, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -662,15 +662,15 @@ error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
- return 0;
+ loc, size, xdata);
+ return 0;
}
@@ -678,18 +678,18 @@ int
error_gen_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, dict_t *params)
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -704,15 +704,15 @@ error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, params);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
@@ -720,17 +720,17 @@ int
error_gen_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -745,31 +745,32 @@ error_gen_mkdir (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
int
error_gen_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -783,31 +784,34 @@ error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
+ xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
int
error_gen_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -821,15 +825,15 @@ error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
@@ -837,17 +841,17 @@ int
error_gen_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -862,15 +866,15 @@ error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL); /* pre & post parent attr */
return 0;
}
STACK_WIND (frame, error_gen_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
@@ -878,18 +882,19 @@ int
error_gen_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
+ prenewparent, postnewparent, xdata);
+ return 0;
}
int
error_gen_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -904,15 +909,15 @@ error_gen_rename (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -920,17 +925,17 @@ int
error_gen_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -945,15 +950,15 @@ error_gen_link (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -961,17 +966,18 @@ int
error_gen_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -986,30 +992,30 @@ error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL); /* pre & post attr */
+ NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
- return 0;
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
int
error_gen_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1023,15 +1029,15 @@ error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
+ loc, flags, fd, xdata);
+ return 0;
}
@@ -1039,17 +1045,17 @@ int
error_gen_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref);
- return 0;
+ vector, count, stbuf, iobref, xdata);
+ return 0;
}
int
error_gen_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1064,33 +1070,33 @@ error_gen_readv (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- return 0;
+ fd, size, offset, flags, xdata);
+ return 0;
}
int
error_gen_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t off, struct iobref *iobref)
+ off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1102,31 +1108,47 @@ error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (enable)
op_errno = error_gen (this, GF_FOP_WRITE);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ if (op_errno == GF_ERROR_SHORT_WRITE) {
+ struct iovec *shortvec;
+
+ /*
+ * A short write error returns some value less than what was
+ * requested from a write. To simulate this, replace the vector
+ * with one half the size;
+ */
+ shortvec = iov_dup(vector, 1);
+ shortvec->iov_len /= 2;
+
+ STACK_WIND(frame, error_gen_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, shortvec, count,
+ off, flags, iobref, xdata);
+ GF_FREE(shortvec);
return 0;
+ } else if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, off, iobref);
- return 0;
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
int
error_gen_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1140,15 +1162,15 @@ error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
@@ -1156,15 +1178,15 @@ int
error_gen_fsync_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
-error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1178,29 +1200,29 @@ error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1214,29 +1236,29 @@ error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
int
error_gen_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
-error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1250,29 +1272,29 @@ error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
int
error_gen_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1286,30 +1308,29 @@ error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1323,31 +1344,30 @@ error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1361,30 +1381,30 @@ error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
- return 0;
+ loc, dict, flags, xdata);
+ return 0;
}
int
error_gen_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1398,31 +1418,103 @@ error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+error_gen_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- return 0;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FGETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FGETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1436,31 +1528,30 @@ error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
- return 0;
+ loc, flags, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
-
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1474,31 +1565,30 @@ error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
- return 0;
+ fd, flags, dict, xdata);
+ return 0;
}
int
error_gen_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1512,30 +1602,66 @@ error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FREMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FREMOVEXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
}
int
error_gen_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
int
error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1549,32 +1675,31 @@ error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
- return 0;
+ fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1588,32 +1713,31 @@ error_gen_inodelk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_finodelk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1627,32 +1751,31 @@ error_gen_finodelk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1666,32 +1789,31 @@ error_gen_entrylk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
int
-error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1705,15 +1827,15 @@ error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
@@ -1725,8 +1847,7 @@ error_gen_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, char *spec_data)
{
STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
-
- return 0;
+ return 0;
}
@@ -1760,16 +1881,17 @@ error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
int
error_gen_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off)
+ size_t size, off_t off, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1783,30 +1905,31 @@ error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL, xdata);
return 0;
}
STACK_WIND (frame, error_gen_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
+ fd, size, off, xdata);
return 0;
}
int
error_gen_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
+ off_t off, dict_t *dict)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1820,29 +1943,94 @@ error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
+ fd, size, off, dict);
return 0;
}
-
-int
-error_gen_closedir (xlator_t *this, fd_t *fd)
+static void
+error_gen_set_failure (eg_t *pvt, int percent)
{
- return 0;
+ GF_ASSERT (pvt);
+
+ if (percent)
+ pvt->failure_iter_no = 100/percent;
+ else
+ pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
}
+static void
+error_gen_parse_fill_fops (eg_t *pvt, char *enable_fops)
+{
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
-int
-error_gen_close (xlator_t *this, fd_t *fd)
+ GF_ASSERT (pvt);
+ GF_ASSERT (this);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 0;
+
+ if (!enable_fops) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "All fops are enabled.");
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 1;
+ } else {
+ op_no_str = strtok_r (enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = get_fop_int (&op_no_str);
+ if (op_no == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Wrong option value %s", op_no_str);
+ } else
+ pvt->enable[op_no] = 1;
+
+ op_no_str = strtok_r (NULL, ",", &saveptr);
+ }
+ }
+}
+
+int32_t
+error_gen_priv_dump (xlator_t *this)
{
- return 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ eg_t *conf = NULL;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.debug.error-gen.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.debug.error-gen","%s.priv",
+ this->name);
+
+ gf_proc_dump_write("op_count", "%d", conf->op_count);
+ gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
+ gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("random_failure", "%d", conf->random_failure);
+
+ UNLOCK(&conf->lock);
+out:
+ return ret;
}
int32_t
@@ -1865,17 +2053,43 @@ mem_acct_init (xlator_t *this)
}
int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
+
+ if (!this || !this->private)
+ goto out;
+
+ pvt = this->private;
+
+ GF_OPTION_RECONF ("error-no", pvt->error_no, options, str, out);
+
+ GF_OPTION_RECONF ("failure", failure_percent_int, options, int32,
+ out);
+
+ GF_OPTION_RECONF ("enable", error_enable_fops, options, str, out);
+
+ GF_OPTION_RECONF ("random-failure", pvt->random_failure, options,
+ bool, out);
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
+}
+
+int
init (xlator_t *this)
{
eg_t *pvt = NULL;
- data_t *error_no = NULL;
- data_t *failure_percent = NULL;
- data_t *enable = NULL;
int32_t ret = 0;
char *error_enable_fops = NULL;
- char *op_no_str = NULL;
- int op_no = -1;
- int i = 0;
int32_t failure_percent_int = 0;
if (!this->children || this->children->next) {
@@ -1890,74 +2104,34 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- error_no = dict_get (this->options, "error-no");
- failure_percent = dict_get (this->options, "failure");
- enable = dict_get (this->options, "enable");
-
pvt = GF_CALLOC (1, sizeof (eg_t), gf_error_gen_mt_eg_t);
if (!pvt) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory.");
ret = -1;
goto out;
}
LOCK_INIT (&pvt->lock);
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 0;
- if (!error_no) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error-no not specified.");
- } else {
- pvt->error_no = data_to_str (error_no);
- }
+ GF_OPTION_INIT ("error-no", pvt->error_no, str, out);
- if (!failure_percent) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failure percent not specified.");
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- } else {
- failure_percent_int = data_to_int32 (failure_percent);
- if (failure_percent_int)
- pvt->failure_iter_no = 100/failure_percent_int;
- else
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- }
+ GF_OPTION_INIT ("failure", failure_percent_int, int32, out);
+
+ GF_OPTION_INIT ("enable", error_enable_fops, str, out);
+
+ GF_OPTION_INIT ("random-failure", pvt->random_failure, bool, out);
+
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
- if (!enable) {
- gf_log (this->name, GF_LOG_WARNING,
- "All fops are enabled.");
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 1;
- } else {
- error_enable_fops = data_to_str (enable);
- op_no_str = error_enable_fops;
- while ((*error_enable_fops) != '\0') {
- error_enable_fops++;
- if (((*error_enable_fops) == ',') ||
- ((*error_enable_fops) == '\0')) {
- if ((*error_enable_fops) != '\0') {
- (*error_enable_fops) = '\0';
- error_enable_fops++;
- }
- op_no = get_fop_int (&op_no_str);
- if (op_no == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Wrong option value %s",
- op_no_str);
- } else
- pvt->enable[op_no] = 1;
- op_no_str = error_enable_fops;
- }
- }
- }
this->private = pvt;
/* Give some seed value here */
srand (time(NULL));
out:
+ if (ret)
+ GF_FREE (pvt);
return ret;
}
@@ -1979,6 +2153,12 @@ fini (xlator_t *this)
return;
}
+struct xlator_dumpops dumpops = {
+ .priv = error_gen_priv_dump,
+};
+
+struct xlator_fops cbks;
+
struct xlator_fops fops = {
.lookup = error_gen_lookup,
.stat = error_gen_stat,
@@ -2001,6 +2181,9 @@ struct xlator_fops fops = {
.setxattr = error_gen_setxattr,
.getxattr = error_gen_getxattr,
.removexattr = error_gen_removexattr,
+ .fsetxattr = error_gen_fsetxattr,
+ .fgetxattr = error_gen_fgetxattr,
+ .fremovexattr = error_gen_fremovexattr,
.opendir = error_gen_opendir,
.readdir = error_gen_readdir,
.readdirp = error_gen_readdirp,
@@ -2021,22 +2204,29 @@ struct xlator_fops fops = {
.getspec = error_gen_getspec,
};
-struct xlator_cbks cbks = {
- .release = error_gen_close,
- .releasedir = error_gen_closedir,
-};
-
struct volume_options options[] = {
{ .key = {"failure"},
- .type = GF_OPTION_TYPE_INT },
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Percentage failure of operations when enabled.",
+ },
+
{ .key = {"error-no"},
.value = {"ENOENT","ENOTDIR","ENAMETOOLONG","EACCES","EBADF",
"EFAULT","ENOMEM","EINVAL","EIO","EEXIST","ENOSPC",
"EPERM","EROFS","EBUSY","EISDIR","ENOTEMPTY","EMLINK"
"ENODEV","EXDEV","EMFILE","ENFILE","ENOSYS","EINTR",
- "EFBIG","EAGAIN"},
- .type = GF_OPTION_TYPE_STR },
+ "EFBIG","EAGAIN","GF_ERROR_SHORT_WRITE"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"random-failure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
{ .key = {"enable"},
- .type = GF_OPTION_TYPE_STR },
+ .type = GF_OPTION_TYPE_STR,
+ },
+
{ .key = {NULL} }
};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
index bd92aad6a..d92c23062 100644
--- a/xlators/debug/error-gen/src/error-gen.h
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _ERROR_GEN_H
#define _ERROR_GEN_H
@@ -29,11 +19,24 @@
#define GF_FAILURE_DEFAULT 10
+/*
+ * Pseudo-errors refer to errors beyond the scope of traditional <-1, op_errno>
+ * returns. This facilitates the ability to return unexpected, but not -1 values
+ * and/or to inject operations that lead to implicit error conditions. The range
+ * for pseudo errors resides at a high value to avoid conflicts with the errno
+ * range.
+ */
+enum GF_PSEUDO_ERRORS {
+ GF_ERROR_SHORT_WRITE = 1000, /* short writev return value */
+ GF_ERROR_MAX
+};
+
typedef struct {
int enable[GF_FOP_MAXVALUE];
int op_count;
int failure_iter_no;
char *error_no;
+ gf_boolean_t random_failure;
gf_lock_t lock;
} eg_t;
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index b894e79c3..332d79015 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,14 +2,15 @@
xlator_LTLIBRARIES = io-stats.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-io_stats_la_LDFLAGS = -module -avoidversion
+io_stats_la_LDFLAGS = -module -avoid-version
io_stats_la_SOURCES = io-stats.c
io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = io-stats-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index 2063f6d6a..c30dfb17e 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __IO_STATS_MEM_TYPES_H__
#define __IO_STATS_MEM_TYPES_H__
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 6c5563cad..7fb697ae4 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -58,7 +48,7 @@ typedef enum {
IOS_STATS_TYPE_READDIRP,
IOS_STATS_TYPE_READ_THROUGHPUT,
IOS_STATS_TYPE_WRITE_THROUGHPUT,
- IOS_STATS_TYPE_MAX,
+ IOS_STATS_TYPE_MAX
}ios_stats_type_t;
typedef enum {
@@ -121,7 +111,7 @@ struct ios_conf {
struct ios_global_stats incremental;
gf_boolean_t dump_fd_stats;
gf_boolean_t count_fop_hits;
- int measure_latency;
+ gf_boolean_t measure_latency;
struct ios_stat_head list[IOS_STATS_TYPE_MAX];
struct ios_stat_head thru_list[IOS_STATS_THRU_MAX];
};
@@ -293,43 +283,43 @@ is_fop_latency_started (call_frame_t *frame)
} \
UNLOCK (&iosstat->lock); \
ios_stat_add_to_list (&conf->list[type], \
- value, iosstat); \
+ value, iosstat); \
\
} while (0)
#define BUMP_THROUGHPUT(iosstat, type) \
do { \
- struct ios_conf *conf = NULL; \
- double elapsed; \
- struct timeval *begin, *end; \
- double throughput; \
+ struct ios_conf *conf = NULL; \
+ double elapsed; \
+ struct timeval *begin, *end; \
+ double throughput; \
int flag = 0; \
- \
- begin = &frame->begin; \
- end = &frame->end; \
- \
- elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
- + (end->tv_usec - begin->tv_usec); \
- throughput = op_ret / elapsed; \
- \
- conf = this->private; \
- LOCK(&iosstat->lock); \
- { \
- if (iosstat->thru_counters[type].throughput \
+ \
+ begin = &frame->begin; \
+ end = &frame->end; \
+ \
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
+ + (end->tv_usec - begin->tv_usec); \
+ throughput = op_ret / elapsed; \
+ \
+ conf = this->private; \
+ LOCK(&iosstat->lock); \
+ { \
+ if (iosstat->thru_counters[type].throughput \
<= throughput) { \
- iosstat->thru_counters[type].throughput = \
+ iosstat->thru_counters[type].throughput = \
throughput; \
- gettimeofday (&iosstat-> \
+ gettimeofday (&iosstat-> \
thru_counters[type].time, NULL); \
flag = 1; \
} \
} \
- UNLOCK (&iosstat->lock); \
+ UNLOCK (&iosstat->lock); \
if (flag) \
ios_stat_add_to_list (&conf->thru_list[type], \
throughput, iosstat); \
- } while (0)
+ } while (0)
int
ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
@@ -484,12 +474,12 @@ ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
new = GF_CALLOC (1, sizeof (*new),
gf_io_stats_mt_ios_stat_list);
new->iosstat = iosstat;
- new->value = value;
+ new->value = value;
ios_stat_ref (iosstat);
- list_add_tail (&new->list, &tmp->list);
+ list_add_tail (&new->list, &tmp->list);
stat = last->iosstat;
last->iosstat = NULL;
- ios_stat_unref (stat);
+ ios_stat_unref (stat);
list_del (&last->list);
GF_FREE (last);
if (reposition == MAX_LIST_MEMBERS)
@@ -511,7 +501,7 @@ ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
list_head->members++;
if (list_head->min_cnt > value)
list_head->min_cnt = value;
- }
+ }
}
out:
UNLOCK (&list_head->lock);
@@ -555,7 +545,7 @@ ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this, FILE* logf
LOCK (&list_head->lock);
{
list_for_each_entry (entry, &list_head->iosstats->list, list) {
- ios_log (this, logfp, "%.0f\t\t%s",
+ ios_log (this, logfp, "%-12.0f %s",
entry->value, entry->iosstat->filename);
}
}
@@ -568,24 +558,20 @@ ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
FILE* logfp, ios_stats_type_t type)
{
struct ios_stat_list *entry = NULL;
- struct timeval time = {0, };
- struct tm *tm = NULL;
+ struct timeval time = {0, };
char timestr[256] = {0, };
LOCK (&list_head->lock);
{
list_for_each_entry (entry, &list_head->iosstats->list, list) {
- time = entry->iosstat->thru_counters[type].time;
- tm = localtime (&time.tv_sec);
- if (!tm)
- continue;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ entry->iosstat->thru_counters[type].time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS, time.tv_usec);
- ios_log (this, logfp, "%.2f\t\t%s \t\t- %s",
- entry->value,
- entry->iosstat->filename, timestr);
+ ios_log (this, logfp, "%s \t %-10.2f \t %s",
+ timestr, entry->value, entry->iosstat->filename);
}
}
UNLOCK (&list_head->lock);
@@ -596,56 +582,108 @@ int
io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
struct timeval *now, int interval, FILE* logfp)
{
- int i = 0;
+ int i = 0;
+ int per_line = 0;
+ int index = 0;
struct ios_stat_head *list_head = NULL;
struct ios_conf *conf = NULL;
- struct tm *tm = NULL;
char timestr[256] = {0, };
+ char str_header[128] = {0};
+ char str_read[128] = {0};
+ char str_write[128] = {0};
conf = this->private;
if (interval == -1)
- ios_log (this, logfp, "=== Cumulative stats ===");
+ ios_log (this, logfp, "\n=== Cumulative stats ===");
else
- ios_log (this, logfp, "=== Interval %d stats ===",
+ ios_log (this, logfp, "\n=== Interval %d stats ===",
interval);
- ios_log (this, logfp, " Duration : %"PRId64"secs",
+ ios_log (this, logfp, " Duration : %"PRId64" secs",
(uint64_t) (now->tv_sec - stats->started_at.tv_sec));
ios_log (this, logfp, " BytesRead : %"PRId64,
stats->data_read);
- ios_log (this, logfp, " BytesWritten : %"PRId64,
+ ios_log (this, logfp, " BytesWritten : %"PRId64"\n",
stats->data_written);
+ snprintf (str_header, sizeof (str_header), "%-12s %c", "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c", "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c", "Write Count", ':');
+ index = 14;
for (i = 0; i < 32; i++) {
+ if ((stats->block_count_read[i] == 0) &&
+ (stats->block_count_write[i] == 0))
+ continue;
+ per_line++;
+
+ snprintf (str_header+index, sizeof (str_header)-index,
+ "%16dB+", (1<<i));
if (stats->block_count_read[i])
- ios_log (this, logfp, " Read %06db+ : %"PRId64,
- (1 << i), stats->block_count_read[i]);
+ snprintf (str_read+index, sizeof (str_read)-index,
+ "%18"PRId64, stats->block_count_read[i]);
+ else snprintf (str_read+index, sizeof (str_read)-index,
+ "%18s", "0");
+ if (stats->block_count_write[i])
+ snprintf (str_write+index, sizeof (str_write)-index,
+ "%18"PRId64, stats->block_count_write[i]);
+ else snprintf (str_write+index, sizeof (str_write)-index,
+ "%18s", "0");
+
+ index += 18;
+ if (per_line == 3) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
+
+ memset (str_header, 0, sizeof (str_header));
+ memset (str_read, 0, sizeof (str_read));
+ memset (str_write, 0, sizeof (str_write));
+
+ snprintf (str_header, sizeof (str_header), "%-12s %c",
+ "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c",
+ "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c",
+ "Write Count", ':');
+
+ index = 14;
+ per_line = 0;
+ }
}
- for (i = 0; i < 32; i++) {
- if (stats->block_count_write[i])
- ios_log (this, logfp, "Write %06db+ : %"PRId64,
- (1 << i), stats->block_count_write[i]);
+ if (per_line != 0) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
}
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "Fop",
+ "Call Count", "Avg-Latency", "Min-Latency",
+ "Max-Latency");
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "---", "----------",
+ "-----------", "-----------", "-----------");
+
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
if (stats->fop_hits[i] && !stats->latency[i].avg)
- ios_log (this, logfp, "%14s : %"PRId64,
- gf_fop_list[i], stats->fop_hits[i]);
+ ios_log (this, logfp, "%-13s %10"PRId64" %11s "
+ "us %11s us %11s us", gf_fop_list[i],
+ stats->fop_hits[i], "0", "0", "0");
else if (stats->fop_hits[i] && stats->latency[i].avg)
- ios_log (this, logfp, "%14s : %"PRId64 ", latency"
- "(avg: %f, min: %f, max: %f)",
- gf_fop_list[i], stats->fop_hits[i],
- stats->latency[i].avg, stats->latency[i].min,
- stats->latency[i].max);
+ ios_log (this, logfp, "%-13s %10"PRId64" %11.2lf us "
+ "%11.2lf us %11.2lf us", gf_fop_list[i],
+ stats->fop_hits[i], stats->latency[i].avg,
+ stats->latency[i].min, stats->latency[i].max);
}
+ ios_log (this, logfp, "------ ----- ----- ----- ----- ----- ----- ----- "
+ " ----- ----- ----- -----\n");
if (interval == -1) {
LOCK (&conf->lock);
{
- tm = localtime (&conf->cumulative.max_openfd_time.tv_sec);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS,
conf->cumulative.max_openfd_time.tv_usec);
ios_log (this, logfp, "Current open fd's: %"PRId64
@@ -654,41 +692,43 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
conf->cumulative.max_nr_opens, timestr);
}
UNLOCK (&conf->lock);
- ios_log (this, logfp, "==========Open file stats========");
- ios_log (this, logfp, "open call count:\t\t\tfile name");
+ ios_log (this, logfp, "\n==========Open File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
list_head = &conf->list[IOS_STATS_TYPE_OPEN];
ios_dump_file_stats (list_head, this, logfp);
- ios_log (this, logfp, "==========Read file stats========");
- ios_log (this, logfp, "read call count:\t\t\tfilename");
+ ios_log (this, logfp, "\n==========Read File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
list_head = &conf->list[IOS_STATS_TYPE_READ];
ios_dump_file_stats (list_head, this, logfp);
- ios_log (this, logfp, "==========Write file stats========");
- ios_log (this, logfp, "write call count:\t\t\tfilename");
+ ios_log (this, logfp, "\n==========Write File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
list_head = &conf->list[IOS_STATS_TYPE_WRITE];
ios_dump_file_stats (list_head, this, logfp);
- ios_log (this, logfp, "==========Directory open stats========");
- ios_log (this, logfp, "Opendir count:\t\t\tdirectory name");
+ ios_log (this, logfp, "\n==========Directory open stats========");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
ios_dump_file_stats (list_head, this, logfp);
- ios_log (this, logfp, "==========Directory readdirp stats========");
- ios_log (this, logfp, "readdirp count:\t\t\tdirectory name");
+ ios_log (this, logfp, "\n========Directory readdirp Stats=======");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
ios_dump_file_stats (list_head, this, logfp);
- ios_log (this, logfp, "==========Read throughput file stats========");
- ios_log (this, logfp, "read throughput(MBps):\t\t\tfilename");
+ ios_log (this, logfp, "\n========Read Throughput File Stats=====");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
+ ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
- ios_log (this, logfp, "==========Write throughput file stats========");
- ios_log (this, logfp, "write througput (MBps):\t\t\tfilename");
+ ios_log (this, logfp, "\n======Write Throughput File Stats======");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
- ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
+ ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
}
return 0;
}
@@ -1027,36 +1067,45 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
struct ios_stat_list *entry = NULL;
int ret = -1;
ios_stats_thru_t index = IOS_STATS_THRU_MAX;
- struct tm *tm = NULL;
char timestr[256] = {0, };
+ char *dict_timestr = NULL;
conf = this->private;
switch (flags) {
- case IOS_STATS_TYPE_OPEN:
+ case IOS_STATS_TYPE_OPEN:
list_head = &conf->list[IOS_STATS_TYPE_OPEN];
LOCK (&conf->lock);
{
ret = dict_set_uint64 (resp, "current-open",
conf->cumulative.nr_opens);
if (ret)
- goto out;
+ goto unlock;
ret = dict_set_uint64 (resp, "max-open",
conf->cumulative.max_nr_opens);
- tm = localtime (&conf->cumulative.max_openfd_time.tv_sec);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
-
- ret = dict_set_str (resp, "max-openfd-time",
- timestr);
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ if (conf->cumulative.max_openfd_time.tv_sec)
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+
+ dict_timestr = gf_strdup (timestr);
+ if (!dict_timestr)
+ goto unlock;
+ ret = dict_set_dynstr (resp, "max-openfd-time",
+ dict_timestr);
if (ret)
- goto out;
+ goto unlock;
}
+ unlock:
UNLOCK (&conf->lock);
-
+ /* Do not proceed if we came here because of some error
+ * during the dict operation */
+ if (ret)
+ goto out;
break;
case IOS_STATS_TYPE_READ:
list_head = &conf->list[IOS_STATS_TYPE_READ];
@@ -1072,7 +1121,7 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
break;
case IOS_STATS_TYPE_READ_THROUGHPUT:
list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- index = IOS_STATS_THRU_READ;
+ index = IOS_STATS_THRU_READ;
break;
case IOS_STATS_TYPE_WRITE_THROUGHPUT:
list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
@@ -1093,39 +1142,44 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
snprintf (key, 256, "%s-%d", "filename", cnt);
ret = dict_set_str (resp, key, entry->iosstat->filename);
if (ret)
- goto out;
+ goto unlock_list_head;
snprintf (key, 256, "%s-%d", "value",cnt);
ret = dict_set_uint64 (resp, key, entry->value);
if (ret)
- goto out;
+ goto unlock_list_head;
if (index != IOS_STATS_THRU_MAX) {
snprintf (key, 256, "%s-%d", "time-sec", cnt);
- ret = dict_set_int32 (resp, key,
+ ret = dict_set_int32 (resp, key,
entry->iosstat->thru_counters[index].time.tv_sec);
if (ret)
- goto out;
+ goto unlock_list_head;
snprintf (key, 256, "%s-%d", "time-usec", cnt);
- ret = dict_set_int32 (resp, key,
+ ret = dict_set_int32 (resp, key,
entry->iosstat->thru_counters[index].time.tv_usec);
if (ret)
- goto out;
+ goto unlock_list_head;
}
if (cnt == list_cnt)
break;
}
}
+unlock_list_head:
UNLOCK (&list_head->lock);
-
+ /* ret is !=0 if some dict operation in the above critical region
+ * failed. */
+ if (ret)
+ goto out;
ret = dict_set_int32 (resp, "members", cnt);
out:
return ret;
}
+
int
io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
@@ -1178,21 +1232,21 @@ io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unwind:
UPDATE_PROFILE_STATS (frame, CREATE);
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
struct ios_stat *iosstat = NULL;
struct ios_conf *conf = NULL;
- conf = this->private;
+ conf = this->private;
path = frame->local;
frame->local = NULL;
@@ -1216,6 +1270,16 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ios_fd_ctx_set (fd, this, iosfd);
ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!iosstat) {
+ iosstat = GF_CALLOC (1, sizeof (*iosstat),
+ gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ iosstat->filename = gf_strdup (path);
+ uuid_copy (iosstat->gfid, fd->inode->gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+ }
+ }
LOCK (&conf->lock);
{
@@ -1233,7 +1297,7 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unwind:
UPDATE_PROFILE_STATS (frame, OPEN);
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -1241,10 +1305,10 @@ unwind:
int
io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, STAT);
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -1253,7 +1317,7 @@ int
io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *buf, struct iobref *iobref)
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
{
int len = 0;
fd_t *fd = NULL;
@@ -1277,7 +1341,7 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
+ vector, count, buf, iobref, xdata);
return 0;
}
@@ -1286,7 +1350,7 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
inode_t *inode = NULL;
@@ -1298,13 +1362,13 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ios_inode_ctx_get (inode, this, &iosstat);
if (iosstat) {
BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
- BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
inode = NULL;
iosstat = NULL;
}
}
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
@@ -1314,7 +1378,7 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
inode_t *inode = frame->local;
@@ -1330,17 +1394,17 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
iosstat = NULL;
}
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, READDIR);
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -1348,10 +1412,10 @@ io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSYNC);
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
@@ -1359,10 +1423,10 @@ io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SETATTR);
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
return 0;
}
@@ -1370,11 +1434,11 @@ io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, UNLINK);
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1384,12 +1448,12 @@ int
io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, RENAME);
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
+ prenewparent, postnewparent, xdata);
return 0;
}
@@ -1397,10 +1461,10 @@ io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *buf,
- struct iatt *sbuf)
+ struct iatt *sbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, READLINK);
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
return 0;
}
@@ -1409,10 +1473,10 @@ int
io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
UPDATE_PROFILE_STATS (frame, LOOKUP);
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr,
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
}
@@ -1422,11 +1486,11 @@ int
io_stats_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SYMLINK);
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1435,11 +1499,11 @@ int
io_stats_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, MKNOD);
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1448,7 +1512,8 @@ int
io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
char *path = frame->local;
@@ -1466,8 +1531,11 @@ io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
unwind:
+ /* local is assigned with path */
+ GF_FREE (frame->local);
+ frame->local = NULL;
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1476,28 +1544,28 @@ int
io_stats_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, LINK);
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FLUSH);
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
int ret = -1;
@@ -1513,7 +1581,7 @@ io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
unwind:
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -1521,13 +1589,13 @@ unwind:
int
io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, RMDIR);
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1535,71 +1603,100 @@ io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, TRUNCATE);
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, STATFS);
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SETXATTR);
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, GETXATTR);
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, REMOVEXATTR);
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+io_stats_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FSETXATTR);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FGETXATTR);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FREMOVEXATTR);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSYNCDIR);
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, ACCESS);
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -1607,92 +1704,126 @@ io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FTRUNCATE);
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSTAT);
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
+io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FALLOCATE);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int
+io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, DISCARD);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, ZEROFILL);
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, LK);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
int
io_stats_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, ENTRYLK);
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, XATTROP);
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FXATTROP);
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, INODELK);
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
int
io_stats_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
@@ -1700,128 +1831,122 @@ io_stats_inodelk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
int
io_stats_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FINODELK);
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
-io_stats_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
int
-io_stats_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
-
+ loc, flags, dict, xdata);
return 0;
}
int
-io_stats_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
+ fd, flags, dict, xdata);
return 0;
}
int
io_stats_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
+ loc, xdata);
return 0;
}
int
-io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
-
+ loc, xdata);
return 0;
}
int
io_stats_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
-
+ loc, size, xdata);
return 0;
}
int
-io_stats_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, params);
-
+ loc, mode, dev, umask, xdata);
return 0;
}
int
io_stats_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -1830,117 +1955,112 @@ io_stats_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
int
io_stats_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
int
io_stats_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
-
+ loc, flags, xdata);
return 0;
}
int
-io_stats_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
-
+ linkpath, loc, umask, xdata);
return 0;
}
int
io_stats_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
-
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid)
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
-
+ loc, stbuf, valid, xdata);
return 0;
}
int
io_stats_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
-
+ loc, offset, xdata);
return 0;
}
int
-io_stats_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd, int32_t wbflags)
+io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -1949,7 +2069,7 @@ io_stats_open (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
@@ -1957,7 +2077,7 @@ io_stats_open (call_frame_t *frame, xlator_t *this,
int
io_stats_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -1966,14 +2086,14 @@ io_stats_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
int
io_stats_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
frame->local = fd;
@@ -1982,7 +2102,7 @@ io_stats_readv (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -1991,7 +2111,7 @@ int
io_stats_writev (call_frame_t *frame, xlator_t *this,
fd_t *fd, struct iovec *vector,
int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int len = 0;
@@ -1999,14 +2119,13 @@ io_stats_writev (call_frame_t *frame, xlator_t *this,
frame->local = fd->inode;
len = iov_length (vector, count);
-
BUMP_WRITE (fd, len);
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
@@ -2014,47 +2133,47 @@ io_stats_writev (call_frame_t *frame, xlator_t *this,
int
io_stats_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
int
io_stats_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
int
io_stats_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags)
+ fd_t *fd, int32_t flags, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-void
+int
conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
{
struct {
@@ -2063,16 +2182,12 @@ conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
const char *path;
} *stub;
xlator_t *this = NULL;
- inode_t *inode = NULL;
- const char *path = NULL;
char *filename = NULL;
FILE *logfp = NULL;
struct ios_dump_args args = {0};
stub = data;
this = stub->this;
- inode = stub->inode;
- path = stub->path;
filename = alloca (value->len + 1);
memset (filename, 0, value->len + 1);
@@ -2080,25 +2195,29 @@ conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
if (fnmatch ("*io*stat*dump", key, 0) == 0) {
+ if (!strncmp (filename, "", 1)) {
+ gf_log (this->name, GF_LOG_ERROR, "No filename given");
+ return -1;
+ }
logfp = fopen (filename, "w+");
- GF_ASSERT (logfp);
if (!logfp) {
gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
"for writing", filename);
- return;
+ return -1;
}
(void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
logfp);
io_stats_dump (this, &args);
fclose (logfp);
}
+ return 0;
}
int
io_stats_setxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
struct {
xlator_t *this;
@@ -2117,43 +2236,85 @@ io_stats_setxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
int
io_stats_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
int
io_stats_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int
+io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
return 0;
}
int
io_stats_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
@@ -2161,13 +2322,13 @@ io_stats_opendir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
frame->local = fd->inode;
START_FOP_LATENCY (frame);
@@ -2175,108 +2336,144 @@ io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
STACK_WIND (frame, io_stats_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
-
+ fd, size, offset, dict);
return 0;
}
int
io_stats_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
-
+ fd, size, offset, xdata);
return 0;
}
int
io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
int
io_stats_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
}
int
io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
-
+ fd, offset, xdata);
return 0;
}
int
io_stats_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid)
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
int
io_stats_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+}
+
+
+int
+io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
return 0;
}
int
io_stats_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
+ fd, cmd, lock, xdata);
return 0;
}
@@ -2293,7 +2490,7 @@ io_stats_release (xlator_t *this, fd_t *fd)
LOCK (&conf->lock);
{
- conf->cumulative.nr_opens--;
+ conf->cumulative.nr_opens--;
}
UNLOCK (&conf->lock);
@@ -2301,8 +2498,7 @@ io_stats_release (xlator_t *this, fd_t *fd)
if (iosfd) {
io_stats_dump_fd (this, iosfd);
- if (iosfd->filename)
- GF_FREE (iosfd->filename);
+ GF_FREE (iosfd->filename);
GF_FREE (iosfd);
}
@@ -2327,141 +2523,137 @@ io_stats_forget (xlator_t *this, inode_t *inode)
return 0;
}
-int
-iostats_configure_options (xlator_t *this, dict_t *xl_options,
- struct ios_conf *conf)
+static int
+ios_init_top_stats (struct ios_conf *conf)
{
- int ret = 0;
- int sys_log_level = -1;
- char *sys_log_str = NULL;
- char *log_str = NULL;
- char *def_val = NULL;
- gf_boolean_t def_bool = _gf_false;
+ int i = 0;
- GF_ASSERT (this);
- GF_ASSERT (xl_options);
GF_ASSERT (conf);
- if (xlator_get_volopt_info (&this->volume_options, "dump-fd-stats", &def_val,
- NULL)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- " dump-fd-stats not found");
- ret = -1;
- goto out;
- } else {
- if (gf_string2boolean (def_val, &def_bool)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- "dump-fd-stats corrupt");
- ret = -1;
- goto out;
- }
- }
+ for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
+ conf->list[i].iosstats = GF_CALLOC (1,
+ sizeof(*conf->list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
- ret = dict_get_str_boolean (xl_options, "dump-fd-stats", def_bool);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'dump-fd-stats' takes only boolean arguments");
- } else {
- conf->dump_fd_stats = ret;
- if (conf->dump_fd_stats)
- gf_log (this->name, GF_LOG_DEBUG, "enabling dump-fd-stats");
- else
- gf_log (this->name, GF_LOG_DEBUG, "disabling dump-fd-stats");
- }
+ if (!conf->list[i].iosstats)
+ return -1;
- ret = dict_get_str_boolean (xl_options, "count-fop-hits", _gf_false);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'count-fop-hits' takes only boolean arguments");
- } else {
- conf->count_fop_hits = ret;
- if (conf->count_fop_hits)
- gf_log (this->name, GF_LOG_DEBUG,
- "enabling count-fop-hits");
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "disabling count-fop-hits");
+ INIT_LIST_HEAD(&conf->list[i].iosstats->list);
+ LOCK_INIT (&conf->list[i].lock);
}
- if (xlator_get_volopt_info (&this->volume_options, "latency-measurement",
- &def_val, NULL)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- "latency-measurement not found");
- ret = -1;
- goto out;
- } else {
- if (gf_string2boolean (def_val, &def_bool)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- "latency-measurement corrupt");
- ret = -1;
- goto out;
- }
- }
+ for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
+ conf->thru_list[i].iosstats = GF_CALLOC (1,
+ sizeof (*conf->thru_list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
- ret = dict_get_str_boolean (xl_options, "latency-measurement",
- def_bool);
- if (ret != -1) {
- if (conf->measure_latency != ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "changing latency measurement from %d to %d",
- conf->measure_latency, ret);
- }
- conf->measure_latency = ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "'latency-measurement' takes only boolean arguments");
- }
+ if (!conf->thru_list[i].iosstats)
+ return -1;
- ret = dict_get_str (xl_options, "log-level", &log_str);
- if (!ret) {
- if (!is_gf_log_command(this, "trusted.glusterfs.set-log-level",
- log_str)) {
- gf_log (this->name, GF_LOG_INFO,
- "changing log-level to %s", log_str);
- }
+ INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
+ LOCK_INIT (&conf->thru_list[i].lock);
}
- ret = dict_get_str (xl_options, "sys-log-level", &sys_log_str);
- if (!ret) {
- sys_log_level = glusterd_check_log_level (sys_log_str);
+ return 0;
+}
+
+static void
+ios_destroy_top_stats (struct ios_conf *conf)
+{
+ int i = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *list = NULL;
+ struct ios_stat *stat = NULL;
+
+ GF_ASSERT (conf);
+
+ LOCK (&conf->lock);
+
+ conf->cumulative.nr_opens = 0;
+ conf->cumulative.max_nr_opens = 0;
+ conf->cumulative.max_openfd_time.tv_sec = 0;
+ conf->cumulative.max_openfd_time.tv_usec = 0;
+
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ list_head = &conf->list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
}
- if (ret < 0 || sys_log_level == -1) {
- sys_log_level = glusterd_check_log_level ("CRITICAL");
- gf_log (this->name, GF_LOG_WARNING,
- "setting sys-log-level to CRITICAL");
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "setting sys-log-level to %s", sys_log_str);
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ list_head = &conf->thru_list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
}
- set_sys_log_level (sys_log_level);
+ UNLOCK (&conf->lock);
- ret = 0;
- out:
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return;
}
int
reconfigure (xlator_t *this, dict_t *options)
{
struct ios_conf *conf = NULL;
- glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ char *sys_log_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
if (!this || !this->private)
- return -1;
+ goto out;
conf = this->private;
- iostats_configure_options (this, options, conf);
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- return -1;
+ GF_OPTION_RECONF ("dump-fd-stats", conf->dump_fd_stats, options, bool,
+ out);
- return 0;
+ GF_OPTION_RECONF ("count-fop-hits", conf->count_fop_hits, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("latency-measurement", conf->measure_latency,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
+ }
+
+ GF_OPTION_RECONF ("log-level", log_str, options, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
}
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -2484,16 +2676,19 @@ mem_acct_init (xlator_t *this)
int
init (xlator_t *this)
{
- dict_t *options = NULL;
struct ios_conf *conf = NULL;
- int i = 0;
+ char *sys_log_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+ int ret = -1;
if (!this)
return -1;
- if (!this->children || this->children->next) {
+ if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
- "io_stats translator requires one subvolume");
+ "io_stats translator requires atleast one subvolume");
return -1;
}
@@ -2505,8 +2700,6 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- options = this->options;
-
conf = GF_CALLOC (1, sizeof(*conf), gf_io_stats_mt_ios_conf);
if (!conf) {
@@ -2520,40 +2713,33 @@ init (xlator_t *this)
gettimeofday (&conf->cumulative.started_at, NULL);
gettimeofday (&conf->incremental.started_at, NULL);
- for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
- conf->list[i].iosstats = GF_CALLOC (1,
- sizeof(*conf->list[i].iosstats),
- gf_io_stats_mt_ios_stat);
+ ret = ios_init_top_stats (conf);
+ if (ret)
+ return -1;
- if (!conf->list[i].iosstats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
+ GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
- INIT_LIST_HEAD(&conf->list[i].iosstats->list);
- LOCK_INIT (&conf->list[i].lock);
- }
+ GF_OPTION_INIT ("count-fop-hits", conf->count_fop_hits, bool, out);
- for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
- conf->thru_list[i].iosstats = GF_CALLOC (1,
- sizeof (*conf->thru_list[i].iosstats),
- gf_io_stats_mt_ios_stat);
+ GF_OPTION_INIT ("latency-measurement", conf->measure_latency,
+ bool, out);
- if (!conf->thru_list[i].iosstats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
+ GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
+ }
- INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
- LOCK_INIT (&conf->thru_list[i].lock);
+ GF_OPTION_INIT ("log-level", log_str, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
}
- iostats_configure_options (this, options, conf);
this->private = conf;
-
- return 0;
+ ret = 0;
+out:
+ return ret;
}
@@ -2571,6 +2757,8 @@ fini (xlator_t *this)
return;
this->private = NULL;
+ ios_destroy_top_stats (conf);
+
GF_FREE(conf);
gf_log (this->name, GF_LOG_INFO,
@@ -2579,34 +2767,6 @@ fini (xlator_t *this)
}
int
-validate_options (xlator_t *this, char **op_errstr)
-{
- int ret = 0;
- volume_opt_list_t *vol_opt = NULL;
- volume_opt_list_t *tmp;
-
- if (!this) {
- gf_log (this->name, GF_LOG_DEBUG, "'this' not a valid ptr");
- ret =-1;
- goto out;
- }
-
- if (list_empty (&this->volume_options))
- goto out;
-
- vol_opt = list_entry (this->volume_options.next,
- volume_opt_list_t, list);
- list_for_each_entry_safe (vol_opt, tmp, &this->volume_options, list) {
- ret = validate_xlator_volume_options_attacherr (this,
- vol_opt->given_opt,
- op_errstr);
- }
-
-out:
-
- return ret;
-}
-int
notify (xlator_t *this, int32_t event, void *data, ...)
{
int ret = 0;
@@ -2625,6 +2785,28 @@ notify (xlator_t *this, int32_t event, void *data, ...)
va_end (ap);
switch (event) {
case GF_EVENT_TRANSLATOR_INFO:
+ ret = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ if (ret) {
+ ret = dict_set_int32 (output, "top-op", top_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set top-op in dict");
+ goto out;
+ }
+ ios_destroy_top_stats (this->private);
+ ret = ios_init_top_stats (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to reset top stats");
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ goto out;
+ }
+
ret = dict_get_int32 (dict, "top-op", &top_op);
if (!ret) {
ret = dict_get_int32 (dict, "list-cnt", &list_cnt);
@@ -2688,6 +2870,9 @@ struct xlator_fops fops = {
.setxattr = io_stats_setxattr,
.getxattr = io_stats_getxattr,
.removexattr = io_stats_removexattr,
+ .fsetxattr = io_stats_fsetxattr,
+ .fgetxattr = io_stats_fgetxattr,
+ .fremovexattr = io_stats_fremovexattr,
.opendir = io_stats_opendir,
.readdir = io_stats_readdir,
.readdirp = io_stats_readdirp,
@@ -2705,6 +2890,9 @@ struct xlator_fops fops = {
.fxattrop = io_stats_fxattrop,
.setattr = io_stats_setattr,
.fsetattr = io_stats_fsetattr,
+ .fallocate = io_stats_fallocate,
+ .discard = io_stats_discard,
+ .zerofill = io_stats_zerofill,
};
struct xlator_cbks cbks = {
@@ -2748,7 +2936,7 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.default_value = "CRITICAL",
.description = "Gluster's syslog log-level",
- .value = { "WARNING", "ERROR", "CRITICAL"}
+ .value = { "WARNING", "ERROR", "INFO", "CRITICAL"}
},
{ .key = {"brick-log-level"},
.type = GF_OPTION_TYPE_STR,
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
index 0f1679a04..7b2597b4d 100644
--- a/xlators/debug/trace/src/Makefile.am
+++ b/xlators/debug/trace/src/Makefile.am
@@ -2,13 +2,15 @@
xlator_LTLIBRARIES = trace.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-trace_la_LDFLAGS = -module -avoidversion
+trace_la_LDFLAGS = -module -avoid-version
trace_la_SOURCES = trace.c
trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trace.h trace-mem-types.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
new file mode 100644
index 000000000..9fa7d97c2
--- /dev/null
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __TRACE_MEM_TYPES_H__
+#define __TRACE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_trace_mem_types_ {
+ gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
+ gf_trace_mt_end
+};
+#endif
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 943009daf..c9d839356 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -1,26 +1,15 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "trace.h"
+#include "trace-mem-types.h"
/**
* xlators/debug/trace :
@@ -29,1203 +18,1388 @@
* Very helpful translator for debugging.
*/
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-
-
-struct {
- char *name;
- int enabled;
-} trace_fop_names[GF_FOP_MAXVALUE];
-
-int trace_log_level = GF_LOG_INFO;
-
-static char *
-trace_stat_to_str (struct iatt *buf)
+int
+dump_history_trace (circular_buffer_t *cb, void *data)
{
- char *statstr = NULL;
- char atime_buf[256] = {0,};
- char mtime_buf[256] = {0,};
- char ctime_buf[256] = {0,};
- int asprint_ret_value = 0;
- uint64_t ia_time = 0;
-
- if (!buf) {
- statstr = NULL;
- goto out;
- }
-
- ia_time = buf->ia_atime;
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
-
- ia_time = buf->ia_mtime;
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
+ char *string = NULL;
+ struct tm *tm = NULL;
+ char timestr[256] = {0,};
- ia_time = buf->ia_ctime;
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
+ string = (char *)cb->data;
+ tm = localtime (&cb->tv.tv_sec);
- asprint_ret_value = gf_asprintf (&statstr,
- "gfid=%s ino=%"PRIu64", mode=%o, "
- "nlink=%"GF_PRI_NLINK", uid=%u, "
- "gid=%u, size=%"PRIu64", "
- "blocks=%"PRIu64", atime=%s, "
- "mtime=%s, ctime=%s",
- uuid_utoa (buf->ia_gfid), buf->ia_ino,
- st_mode_from_ia (buf->ia_prot,
- buf->ia_type),
- buf->ia_nlink, buf->ia_uid,
- buf->ia_gid, buf->ia_size,
- buf->ia_blocks, atime_buf,
- mtime_buf, ctime_buf);
+ /* Since we are continuing with adding entries to the buffer even when
+ gettimeofday () fails, it's safe to check tm and then dump the time
+ at which the entry was added to the buffer */
+ if (tm) {
+ strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
+ }
- if (asprint_ret_value < 0)
- statstr = NULL;
+ gf_proc_dump_write ("FOP", "%s\n", string);
-out:
- return statstr;
+ return 0;
}
-
int
trace_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
- "*stbuf {%s}, *preparent {%s}, *postparent = "
- "{%s})", frame->root->unique,
- uuid_utoa (inode->gfid), op_ret, fd,
- statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (preparentstr)
- GF_FREE (preparentstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
+ "*stbuf {%s}, *preparent {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid), op_ret, fd,
+ statstr, preparentstr, postparentstr);
/* for 'release' log */
fd_ctx_set (fd, this, 0);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, *fd=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, "
+ "*fd=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ fd);
+
+ LOG_ELEMENT (conf, string);
}
+out:
/* for 'release' log */
if (op_ret >= 0)
fd_ctx_set (fd, this, 0);
- frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ TRACE_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref)
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref);
+out:
+ TRACE_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
return 0;
}
-
int
trace_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
-
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
-
int
trace_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s}",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_SETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
return 0;
}
-
int
trace_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
return 0;
}
-
int
trace_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (frame->local), op_ret, preparentstr,
- postparentstr);
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ " *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preoldparentstr = NULL;
- char *postoldparentstr = NULL;
- char *prenewparentstr = NULL;
- char *postnewparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preoldparentstr = trace_stat_to_str (preoldparent);
- postoldparentstr = trace_stat_to_str (postoldparent);
-
- prenewparentstr = trace_stat_to_str (prenewparent);
- postnewparentstr = trace_stat_to_str (postnewparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- "*preoldparent = {%s}, *postoldparent = {%s}"
- " *prenewparent = {%s}, *postnewparent = {%s})",
- frame->root->unique, op_ret, statstr,
- preoldparentstr, postoldparentstr,
- prenewparentstr, postnewparentstr);
+ char statstr[4096] = {0, };
+ char preoldparentstr[4096] = {0, };
+ char postoldparentstr[4096] = {0, };
+ char prenewparentstr[4096] = {0, };
+ char postnewparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preoldparentstr)
- GF_FREE (preoldparentstr);
+ conf = this->private;
- if (postoldparentstr)
- GF_FREE (postoldparentstr);
-
- if (prenewparentstr)
- GF_FREE (prenewparentstr);
-
- if (postnewparentstr)
- GF_FREE (postnewparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preoldparent, preoldparentstr);
+ trace_stat_to_str (postoldparent, postoldparentstr);
+ trace_stat_to_str (prenewparent, prenewparentstr);
+ trace_stat_to_str (postnewparent, postnewparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *preoldparent = {%s},"
+ " *postoldparent = {%s}"
+ " *prenewparent = {%s}, "
+ "*postnewparent = {%s})",
+ frame->root->unique, op_ret, statstr,
+ preoldparentstr, postoldparentstr,
+ prenewparentstr, postnewparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
+out:
+ TRACE_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
return 0;
}
-
int
trace_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *stbuf)
+ const char *buf, struct iatt *stbuf, dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
if (op_ret == 0) {
- statstr = trace_stat_to_str (stbuf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s, "
- "stbuf = { %s })",
- frame->root->unique, op_ret, op_errno, buf,
- statstr);
- } else
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ trace_stat_to_str (stbuf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d,"
+ "buf=%s, stbuf = { %s })",
+ frame->root->unique, op_ret, op_errno,
+ buf, statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
- if (statstr)
- GF_FREE (statstr);
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, stbuf);
+out:
+ TRACE_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, stbuf,
+ xdata);
return 0;
}
-
int
trace_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
- char *statstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*buf {%s}, *postparent {%s}",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (postparent, postparentstr);
+ /* print buf->ia_gfid instead of inode->gfid,
+ * since if the inode is not yet linked to the
+ * inode table (fresh lookup) then null gfid
+ * will be printed.
+ */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*buf {%s}, *postparent {%s}",
+ frame->root->unique,
+ uuid_utoa (buf->ia_gfid),
+ op_ret, statstr, postparentstr);
/* For 'forget' */
inode_ctx_put (inode, this, 0);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xattr, postparent);
+out:
+ TRACE_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
return 0;
}
-
int
trace_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (postparentstr)
- GF_FREE (postparentstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- ", *stbuf = {%s}, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ ", *stbuf = {%s}, *prebuf = {%s}, "
+ "*postbuf = {%s} )",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- " *prebuf = {%s}, *postbuf = {%s})",
- frame->root->unique, op_ret,
- statstr, preparentstr, postparentstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (statstr)
- GF_FREE (statstr);
+ conf = this->private;
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *prebuf = {%s},"
+ " *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ statstr, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, fd=%p",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno, fd);
- }
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " fd=%p",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno, fd);
+ LOG_ELEMENT (conf, string);
+ }
+out:
/* for 'releasedir' log */
if (op_ret >= 0)
fd_ctx_set (fd, this, 0);
- frame->local = NULL;
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ TRACE_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, preparentstr, postparentstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "*prebuf={%s}, *postbuf={%s}",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret, preopstr,
- postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
-
int
trace_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
- ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
- "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
- GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
- frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
- buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
- buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": ({f_bsize=%lu, "
+ "f_frsize=%lu, "
+ "f_blocks=%"GF_PRI_FSBLK
+ ", f_bfree=%"GF_PRI_FSBLK", "
+ "f_bavail=%"GF_PRI_FSBLK", "
+ "f_files=%"GF_PRI_FSBLK", "
+ "f_ffree=%"GF_PRI_FSBLK", "
+ "f_favail=%"GF_PRI_FSBLK", "
+ "f_fsid=%lu, f_flag=%lu, "
+ "f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize,
+ buf->f_frsize, buf->f_blocks,
+ buf->f_bfree, buf->f_bavail,
+ buf->f_files, buf->f_ffree,
+ buf->f_favail, buf->f_fsid,
+ buf->f_flag, buf->f_namemax, op_ret);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
- frame->local = NULL;
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
trace_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
trace_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *prebufstr = NULL;
- char *postbufstr = NULL;
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- if (op_ret >= 0) {
- prebufstr = trace_stat_to_str (prebuf);
- postbufstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret,
- prebufstr, postbufstr);
+ char prebufstr[4096] = {0, };
+ char postbufstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (prebufstr)
- GF_FREE (prebufstr);
-
- if (postbufstr)
- GF_FREE (postbufstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, prebufstr);
+ trace_stat_to_str (postbuf, postbufstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ prebufstr, postbufstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char string[4096] = {0.};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d "
+ "buf=%s", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "{l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len,
+ lock->l_pid);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
- }
- frame->local = NULL;
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
-
-
int
trace_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
trace_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
}
- frame->local = NULL;
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum);
+out:
+ TRACE_STACK_UNWIND (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
return 0;
}
@@ -1235,35 +1409,54 @@ trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
trace_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ volume, loc->path, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
-
int
trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {0,};
switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
@@ -1306,32 +1499,43 @@ trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
break;
}
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), volume,
+ loc->path, cmd_str, type_str,
+ (unsigned long long)flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
-
int
trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *cmd_str = NULL, *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
@@ -1374,233 +1578,351 @@ trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
break;
}
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd =%p "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (fd->inode->gfid), volume, fd,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd =%p "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ cmd_str, type_str,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
frame->local = fd->inode->gfid;
- }
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
-
int
trace_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (path=%s flags=%d)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (path=%s flags=%d)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
+ loc, flags, dict, xdata);
return 0;
}
-
int
trace_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
+ fd, flags, dict, xdata);
return 0;
}
-
int
trace_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
/* TODO: print all the keys mentioned in xattr_req */
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ loc, xdata);
return 0;
}
-
int
-trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, size=%"GF_PRI_SIZET")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, size);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "size=%"GF_PRI_SIZET")", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ size);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
}
-
int
trace_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t dev, dict_t *params)
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%d dev=%"GF_PRI_DEV")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode, dev);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d "
+ "umask=0%o, dev=%"GF_PRI_DEV")",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask, dev);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, params);
+ loc, mode, dev, umask, xdata);
return 0;
}
-
int
trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
-
int
-trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flag=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ xflag);
+
frame->local = loc->inode->gfid;
- }
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
-
int
-trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
+ loc, flags, xdata);
return 0;
}
-
int
trace_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s linkpath=%s, path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- linkpath, loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s linkpath=%s, path=%s"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), linkpath,
+ loc->path, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
return 0;
}
-
int
-trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
if (newloc->inode)
uuid_utoa_r (newloc->inode->gfid, newgfid);
else
@@ -1608,29 +1930,40 @@ trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
uuid_utoa_r (oldloc->inode->gfid, oldgfid);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path, newgfid, newloc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid,
+ oldloc->path, newgfid, newloc->path);
frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
-trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LINK].enabled) {
+ char string[4096] = {0,};
if (newloc->inode)
uuid_utoa_r (newloc->inode->gfid, newgfid);
else
@@ -1638,42 +1971,63 @@ trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
uuid_utoa_r (oldloc->inode->gfid, oldgfid);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path,
- newgfid, newloc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s", frame->root->unique,
+ oldgfid, oldloc->path, newgfid,
+ newloc->path);
+
frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ia_time = 0;
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%o)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%o)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s uid=%o,"
+ " gid=%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, stbuf->ia_uid,
+ stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
@@ -1685,44 +2039,64 @@ trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
localtime ((time_t *)&ia_time));
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, actime_str, modtime_str);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
frame->local = loc->inode->gfid;
}
+out:
STACK_WIND (frame, trace_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
-
int
trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ia_time = 0;
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, mode=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, mode=%o",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, uid=%o, "
+ "gid=%o", frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, stbuf->ia_uid, stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
@@ -1734,353 +2108,552 @@ trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
localtime ((time_t *)&ia_time));
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, actime_str, modtime_str);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
frame->local = fd->inode->gfid;
}
+out:
STACK_WIND (frame, trace_fsetattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
-
int
trace_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, offset=%"PRId64"",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "offset=%"PRId64"", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ offset);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
+ loc, offset, xdata);
return 0;
}
-
int
trace_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d fd=%p wbflags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags, fd, wbflags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags, fd);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
-
int
trace_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, fd=%p, flags=0%o mode=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd, flags, mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, fd=%p, "
+ "flags=0%o mode=0%o umask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ fd, flags, mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
-
int
trace_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"
+ GF_PRI_SIZET"offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
-
int
trace_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t offset, struct iobref *iobref)
+ off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, count=%d, offset=%"PRId64")",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, count, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, count=%d, "
+ " offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, count,
+ offset, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
-
int
-trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, (loc->inode)?
- uuid_utoa (loc->inode->gfid):"0", loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique, (loc->inode)?
+ uuid_utoa (loc->inode->gfid):"0", loc->path);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
-
int
-trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s flags=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), flags, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), flags, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-
int
trace_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
-
int
trace_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
trace_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
-trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s fd=%p",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path, fd);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
trace_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64,
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64" dict=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, dict);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
+ fd, size, offset, dict);
return 0;
}
-
int
trace_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64,
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64,
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
+ fd, size, offset, xdata);
return 0;
}
-
int
trace_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s datasync=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- datasync, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s datasync=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), datasync, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
-
int
-trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mask=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mask);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, mask);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
}
-
int32_t
trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len)
+ int32_t len, dict_t *xdata)
{
+
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" len=%u fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, len, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64
+ "len=%u fd=%p", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, len, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rchecksum_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rchecksum,
- fd, offset, len);
+ fd, offset, len, xdata);
return 0;
@@ -2089,163 +2662,264 @@ trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int32_t
trace_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+ entrylk_type type, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd=%p basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- volume, fd, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd=%p "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fentrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
return 0;
}
int32_t
trace_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p name=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p name=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, name);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fgetxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
- fd, name);
+ fd, name, xdata);
return 0;
}
int32_t
trace_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsetxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags);
+ fd, dict, flags, xdata);
return 0;
}
int
trace_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
+ fd, offset, xdata);
return 0;
}
-
int
-trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
return 0;
}
-
int
trace_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock)
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- cmd, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, cmd=%d, "
+ "lock {l_type=%d, "
+ "l_whence=%d, l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, cmd,
+ lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
+ fd, cmd, lock, xdata);
return 0;
}
int32_t
trace_forget (xlator_t *this, inode_t *inode)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
/* If user want to understand when a lookup happens,
he should know about 'forget' too */
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s ino=%"PRIu64,
- uuid_utoa (inode->gfid), inode->ino);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s", uuid_utoa (inode->gfid));
+
+ LOG_ELEMENT (conf, string);
}
+
+out:
return 0;
}
-
int32_t
trace_releasedir (xlator_t *this, fd_t *fd)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
}
+out:
return 0;
}
int32_t
trace_release (xlator_t *this, fd_t *fd)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled ||
trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
}
+
+out:
return 0;
}
-
void
enable_all_calls (int enabled)
{
@@ -2255,7 +2929,6 @@ enable_all_calls (int enabled)
trace_fop_names[i].enabled = enabled;
}
-
void
enable_call (const char *name, int enabled)
{
@@ -2283,6 +2956,105 @@ process_call_list (const char *list, int include)
}
}
+int32_t
+trace_dump_history (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+ trace_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("trace", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ conf = this->private;
+ // Is it ok to return silently if log-history option his off?
+ if (conf && conf->log_history == _gf_true) {
+ gf_proc_dump_build_key (key_prefix, "xlator.debug.trace",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_trace);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_trace_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ trace_conf_t *conf = NULL;
+ char *includes = NULL, *excludes = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ conf = this->private;
+
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ goto out;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+ /* Should resizing of the event-history be allowed in reconfigure?
+ * for which a new event_history might have to be allocated and the
+ * older history has to be freed.
+ */
+ GF_OPTION_RECONF ("log-file", conf->log_file, options, bool, out);
+
+ GF_OPTION_RECONF ("log-history", conf->log_history, options, bool, out);
+
+ ret = 0;
+
+out:
+ return ret;
+}
int32_t
init (xlator_t *this)
@@ -2290,6 +3062,10 @@ init (xlator_t *this)
dict_t *options = NULL;
char *includes = NULL, *excludes = NULL;
char *forced_loglevel = NULL;
+ eh_t *history = NULL;
+ int ret = -1;
+ size_t history_size = TRACE_DEFAULT_HISTORY_SIZE;
+ trace_conf_t *conf = NULL;
if (!this)
return -1;
@@ -2304,6 +3080,12 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
+ conf = GF_CALLOC (1, sizeof (trace_conf_t), gf_trace_mt_trace_conf_t);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot allocate "
+ "xl->private");
+ return -1;
+ }
options = this->options;
includes = data_to_str (dict_get (options, "include-ops"));
@@ -2312,8 +3094,13 @@ init (xlator_t *this)
{
int i;
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- trace_fop_names[i].name = (gf_fop_list[i] ?
- gf_fop_list[i] : ":O");
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
trace_fop_names[i].enabled = 1;
}
}
@@ -2321,14 +3108,43 @@ init (xlator_t *this)
if (includes && excludes) {
gf_log (this->name,
GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
return -1;
}
+
if (includes)
process_call_list (includes, 1);
if (excludes)
process_call_list (excludes, 0);
+
+ GF_OPTION_INIT ("history-size", conf->history_size, size, out);
+
+ gf_log (this->name, GF_LOG_INFO, "history size %"GF_PRI_SIZET,
+ history_size);
+
+ GF_OPTION_INIT ("log-file", conf->log_file, bool, out);
+
+ gf_log (this->name, GF_LOG_INFO, "logging to file %s",
+ (conf->log_file == _gf_true)?"enabled":"disabled");
+
+ GF_OPTION_INIT ("log-history", conf->log_history, bool, out);
+
+ gf_log (this->name, GF_LOG_DEBUG, "logging to history %s",
+ (conf->log_history == _gf_true)?"enabled":"disabled");
+
+ history = eh_new (history_size, _gf_false, NULL);
+ if (!history) {
+ gf_log (this->name, GF_LOG_ERROR, "event history cannot be "
+ "initialized");
+ return -1;
+ }
+
+ this->history = history;
+
+ conf->trace_log_level = GF_LOG_INFO;
+
if (dict_get (options, "force-log-level")) {
forced_loglevel = data_to_str (dict_get (options,
"force-log-level"));
@@ -2336,25 +3152,34 @@ init (xlator_t *this)
goto setloglevel;
if (strcmp (forced_loglevel, "INFO") == 0)
- trace_log_level = GF_LOG_INFO;
+ conf->trace_log_level = GF_LOG_INFO;
else if (strcmp (forced_loglevel, "TRACE") == 0)
- trace_log_level = GF_LOG_TRACE;
+ conf->trace_log_level = GF_LOG_TRACE;
else if (strcmp (forced_loglevel, "ERROR") == 0)
- trace_log_level = GF_LOG_ERROR;
+ conf->trace_log_level = GF_LOG_ERROR;
else if (strcmp (forced_loglevel, "DEBUG") == 0)
- trace_log_level = GF_LOG_DEBUG;
+ conf->trace_log_level = GF_LOG_DEBUG;
else if (strcmp (forced_loglevel, "WARNING") == 0)
- trace_log_level = GF_LOG_WARNING;
+ conf->trace_log_level = GF_LOG_WARNING;
else if (strcmp (forced_loglevel, "CRITICAL") == 0)
- trace_log_level = GF_LOG_CRITICAL;
+ conf->trace_log_level = GF_LOG_CRITICAL;
else if (strcmp (forced_loglevel, "NONE") == 0)
- trace_log_level = GF_LOG_NONE;
+ conf->trace_log_level = GF_LOG_NONE;
}
setloglevel:
- gf_log_set_loglevel (trace_log_level);
+ gf_log_set_loglevel (conf->trace_log_level);
+ this->private = conf;
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (history)
+ GF_FREE (history);
+ if (conf)
+ GF_FREE (conf);
+ }
- return 0;
+ return ret;
}
void
@@ -2363,6 +3188,9 @@ fini (xlator_t *this)
if (!this)
return;
+ if (this->history)
+ eh_destroy (this->history);
+
gf_log (this->name, GF_LOG_INFO,
"trace translator unloaded");
return;
@@ -2411,7 +3239,6 @@ struct xlator_fops fops = {
.fsetattr = trace_fsetattr,
};
-
struct xlator_cbks cbks = {
.release = trace_release,
.releasedir = trace_releasedir,
@@ -2427,5 +3254,21 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR
/*.value = { ""} */
},
+ { .key = {"history-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "1024",
+ },
+ { .key = {"log-file"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {"log-history"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
{ .key = {NULL} },
};
+
+struct xlator_dumpops dumpops = {
+ .history = trace_dump_history
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
new file mode 100644
index 000000000..045eefb36
--- /dev/null
+++ b/xlators/debug/trace/src/trace.h
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "event-history.h"
+#include "logging.h"
+#include "circ-buff.h"
+#include "statedump.h"
+#include "options.h"
+
+#define TRACE_DEFAULT_HISTORY_SIZE 1024
+
+typedef struct {
+ /* Since the longest fop name is fremovexattr i.e 12 characters, array size
+ * is kept 24, i.e double of the maximum.
+ */
+ char name[24];
+ int enabled;
+} trace_fop_name_t;
+
+trace_fop_name_t trace_fop_names[GF_FOP_MAXVALUE];
+
+typedef struct {
+ gf_boolean_t log_file;
+ gf_boolean_t log_history;
+ size_t history_size;
+ int trace_log_level;
+} trace_conf_t;
+
+#define TRACE_STACK_UNWIND(op, frame, params ...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ } while (0);
+
+#define LOG_ELEMENT(_conf, _string) \
+ do { \
+ if (_conf) { \
+ if ((_conf->log_history) == _gf_true) \
+ gf_log_eh ("%s", _string); \
+ if ((_conf->log_file) == _gf_true) \
+ gf_log (THIS->name, _conf->trace_log_level, \
+ "%s", _string); \
+ } \
+ } while (0);
+
+#define trace_stat_to_str(buf, statstr) \
+ do { \
+ char atime_buf[256] = {0,}; \
+ char mtime_buf[256] = {0,}; \
+ char ctime_buf[256] = {0,}; \
+ uint64_t ia_time = 0; \
+ \
+ if (!buf) \
+ break; \
+ \
+ ia_time = buf->ia_atime; \
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_mtime; \
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_ctime; \
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ snprintf (statstr, sizeof (statstr), \
+ "gfid=%s ino=%"PRIu64", mode=%o, " \
+ "nlink=%"GF_PRI_NLINK", uid=%u, " \
+ "gid=%u, size=%"PRIu64", " \
+ "blocks=%"PRIu64", atime=%s, " \
+ "mtime=%s, ctime=%s", \
+ uuid_utoa (buf->ia_gfid), buf->ia_ino, \
+ st_mode_from_ia (buf->ia_prot, \
+ buf->ia_type), \
+ buf->ia_nlink, buf->ia_uid, \
+ buf->ia_gid, buf->ia_size, \
+ buf->ia_blocks, atime_buf, \
+ mtime_buf, ctime_buf); \
+ } while (0);
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
index 2cbde680f..36efc6698 100644
--- a/xlators/encryption/Makefile.am
+++ b/xlators/encryption/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = rot-13
+SUBDIRS = rot-13 crypt
CLEANFILES =
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/encryption/crypt/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/xlators/cluster/unify/Makefile.am
+++ b/xlators/encryption/crypt/Makefile.am
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
new file mode 100644
index 000000000..faadd117f
--- /dev/null
+++ b/xlators/encryption/crypt/src/Makefile.am
@@ -0,0 +1,24 @@
+if ENABLE_CRYPT_XLATOR
+
+xlator_LTLIBRARIES = crypt.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
+
+crypt_la_LDFLAGS = -module -avoidversion -lssl -lcrypto
+
+crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
+crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+else
+
+noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+endif \ No newline at end of file
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
new file mode 100644
index 000000000..1ec41495c
--- /dev/null
+++ b/xlators/encryption/crypt/src/atom.c
@@ -0,0 +1,962 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/*
+ * Glossary
+ *
+ *
+ * cblock (or cipher block). A logical unit in a file.
+ * cblock size is defined as the number of bits
+ * in an input (or output) block of the block
+ * cipher (*). Cipher block size is a property of
+ * cipher algorithm. E.g. cblock size is 64 bits
+ * for DES, 128 bits for AES, etc.
+ *
+ * atomic cipher A cipher algorithm, which requires some chunks of
+ * algorithm text to be padded at left and(or) right sides before
+ * cipher transaform.
+ *
+ *
+ * block (atom) Minimal chunk of file's data, which doesn't require
+ * padding. We'll consider logical units in a file of
+ * block size (atom size).
+ *
+ * cipher algorithm Atomic cipher algorithm, which requires the last
+ * with EOF issue incomplete cblock in a file to be padded with some
+ * data (usually zeros).
+ *
+ *
+ * operation, which reading/writing from offset, which is not aligned to
+ * forms a gap at to atom size
+ * the beginning
+ *
+ *
+ * operation, which reading/writing count bytes starting from offset off,
+ * forms a gap at so that off+count is not aligned to atom_size
+ * the end
+ *
+ * head block the first atom affected by an operation, which forms
+ * a gap at the beginning, or(and) at the end.
+ * Сomment. Head block has at least one gap (either at
+ * the beginning, or at the end)
+ *
+ *
+ * tail block the last atom different from head, affected by an
+ * operation, which forms a gap at the end.
+ * Сomment: Tail block has exactly one gap (at the end).
+ *
+ *
+ * partial block head or tail block
+ *
+ *
+ * full block block without gaps.
+ *
+ *
+ * (*) Recommendation for Block Cipher Modes of Operation
+ * Methods and Techniques
+ * NIST Special Publication 800-38A Edition 2001
+ */
+
+/*
+ * atom->offset_at()
+ */
+static off_t offset_at_head(struct avec_config *conf)
+{
+ return conf->aligned_offset;
+}
+
+static off_t offset_at_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_hole_conf(frame));
+}
+
+static off_t offset_at_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_data_conf(frame));
+}
+
+
+static off_t offset_at_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0) +
+ (conf->nr_full_blocks << get_atom_bits(object));
+}
+
+static off_t offset_at_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_hole_conf(frame), object);
+}
+
+
+static off_t offset_at_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_data_conf(frame), object);
+}
+
+static off_t offset_at_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0);
+}
+
+static off_t offset_at_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_data_conf(frame), object);
+}
+
+static off_t offset_at_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->io_size_nopad()
+ */
+
+static uint32_t io_size_nopad_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ uint32_t gap_at_beg;
+ uint32_t gap_at_end;
+
+ check_head_block(conf);
+
+ gap_at_beg = conf->off_in_head;
+
+ if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0 )
+ gap_at_end = 0;
+ else
+ gap_at_end = get_atom_size(object) - conf->off_in_tail;
+
+ return get_atom_size(object) - (gap_at_beg + gap_at_end);
+}
+
+static uint32_t io_size_nopad_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_tail_block(conf);
+ return conf->off_in_tail;
+}
+
+static uint32_t io_size_nopad_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_full_block(conf);
+ return get_atom_size(object);
+}
+
+static uint32_t io_size_nopad_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_hole_conf(frame), object);
+}
+
+static uint32_t offset_in_head(struct avec_config *conf)
+{
+ check_cursor_head(conf);
+
+ return conf->off_in_head;
+}
+
+static uint32_t offset_in_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return 0;
+}
+
+static uint32_t offset_in_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_cursor_full(conf);
+
+ if (has_head_block(conf))
+ return (conf->cursor - 1) << get_atom_bits(object);
+ else
+ return conf->cursor << get_atom_bits(object);
+}
+
+static uint32_t offset_in_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_data_conf(frame));
+}
+
+static uint32_t offset_in_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_hole_conf(frame));
+}
+
+static uint32_t offset_in_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_data_conf(frame), object);
+}
+
+static uint32_t offset_in_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->rmw()
+ */
+/*
+ * Pre-conditions:
+ * @vec contains plain text of the latest
+ * version.
+ *
+ * Uptodate gaps of the @partial block with
+ * this plain text, encrypt the whole block
+ * and write the result to disk.
+ */
+static int32_t rmw_partial_block(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ struct rmw_atom *atom)
+{
+ size_t was_read = 0;
+ uint64_t file_size;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *partial = atom->get_iovec(frame, 0);
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_partial_block;
+#if DEBUG_CRYPT
+ gf_boolean_t check_last_cblock = _gf_false;
+#endif
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto exit;
+
+ file_size = local->cur_file_size;
+ was_read = op_ret;
+
+ if (atom->locality == HEAD_ATOM && conf->off_in_head) {
+ /*
+ * head atom with a non-uptodate gap
+ * at the beginning
+ *
+ * fill the gap with plain text of the
+ * latest version. Convert a part of hole
+ * (if any) to zeros.
+ */
+ int32_t i;
+ int32_t copied = 0;
+ int32_t to_gap; /* amount of data needed to uptodate
+ the gap at the beginning */
+#if 0
+ int32_t hole = 0; /* The part of the hole which
+ * got in the head block */
+#endif /* 0 */
+ to_gap = conf->off_in_head;
+
+ if (was_read < to_gap) {
+ if (file_size >
+ offset_at_head(conf) + was_read) {
+ /*
+ * It is impossible to uptodate
+ * head block: too few bytes have
+ * been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the beginning");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+#if 0
+ hole = to_gap - was_read;
+#endif /* 0 */
+ to_gap = was_read;
+ }
+ /*
+ * uptodate the gap at the beginning
+ */
+ for (i = 0; i < count && copied < to_gap; i++) {
+ int32_t to_copy;
+
+ to_copy = vec[i].iov_len;
+ if (to_copy > to_gap - copied)
+ to_copy = to_gap - copied;
+
+ memcpy(partial->iov_base, vec[i].iov_base, to_copy);
+ copied += to_copy;
+ }
+#if 0
+ /*
+ * If possible, convert part of the
+ * hole, which got in the head block
+ */
+ ret = TRY_LOCK(&local->hole_lock);
+ if (!ret) {
+ if (local->hole_handled)
+ /*
+ * already converted by
+ * crypt_writev_cbk()
+ */
+ UNLOCK(&local->hole_lock);
+ else {
+ /*
+ * convert the part of the hole
+ * which got in the head block
+ * to zeros.
+ *
+ * Update the orig_offset to make
+ * sure writev_cbk() won't care
+ * about this part of the hole.
+ *
+ */
+ memset(partial->iov_base + to_gap, 0, hole);
+
+ conf->orig_offset -= hole;
+ conf->orig_size += hole;
+ UNLOCK(&local->hole_lock);
+ }
+ }
+ else /*
+ * conversion is being performed
+ * by crypt_writev_cbk()
+ */
+ ;
+#endif /* 0 */
+ }
+ if (atom->locality == TAIL_ATOM ||
+ (!has_tail_block(conf) && conf->off_in_tail)) {
+ /*
+ * tail atom, or head atom with a non-uptodate
+ * gap at the end.
+ *
+ * fill the gap at the end of the block
+ * with plain text of the latest version.
+ * Pad the result, (if needed)
+ */
+ int32_t i;
+ int32_t to_gap;
+ int copied;
+ off_t off_in_tail;
+ int32_t to_copy;
+
+ off_in_tail = conf->off_in_tail;
+ to_gap = conf->gap_in_tail;
+
+ if (to_gap && was_read < off_in_tail + to_gap) {
+ /*
+ * It is impossible to uptodate
+ * the gap at the end: too few bytes
+ * have been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the end");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * uptodate the gap at the end
+ */
+ copied = 0;
+ to_copy = to_gap;
+ for(i = count - 1; i >= 0 && to_copy > 0; i--) {
+ uint32_t from_vec, off_in_vec;
+
+ off_in_vec = 0;
+ from_vec = vec[i].iov_len;
+ if (from_vec > to_copy) {
+ off_in_vec = from_vec - to_copy;
+ from_vec = to_copy;
+ }
+ memcpy(partial->iov_base +
+ off_in_tail + to_gap - copied - from_vec,
+ vec[i].iov_base + off_in_vec,
+ from_vec);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
+ (int)from_vec,
+ (int)off_in_tail + to_gap - copied - from_vec,
+ (int)off_in_vec);
+
+ copied += from_vec;
+ to_copy -= from_vec;
+ }
+ partial->iov_len = off_in_tail + to_gap;
+
+ if (object_alg_should_pad(object)) {
+ int32_t resid = 0;
+ resid = partial->iov_len & (object_alg_blksize(object) - 1);
+ if (resid) {
+ /*
+ * append a new EOF padding
+ */
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "set padding size %d",
+ local->eof_padding_size);
+
+ memset(partial->iov_base + partial->iov_len,
+ 1,
+ local->eof_padding_size);
+ partial->iov_len += local->eof_padding_size;
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "pad cblock with %d zeros:",
+ local->eof_padding_size);
+ dump_cblock(this,
+ (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ check_last_cblock = _gf_true;
+#endif
+ }
+ }
+ }
+ /*
+ * encrypt the whole block
+ */
+ encrypt_aligned_iov(object,
+ partial,
+ 1,
+ atom->offset_at(frame, object));
+#if DEBUG_CRYPT
+ if (check_last_cblock == _gf_true) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "encrypt last cblock with offset %llu",
+ (unsigned long long)atom->offset_at(frame, object));
+ dump_cblock(this, (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ }
+#endif
+ set_local_io_params_writev(frame, object, atom,
+ atom->offset_at(frame, object),
+ iovec_get_size(partial, 1));
+ /*
+ * write the whole block to disk
+ */
+ end_writeback_partial_block = dispatch_end_writeback(local->fop);
+ conf->cursor ++;
+ STACK_WIND(frame,
+ end_writeback_partial_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ partial,
+ 1,
+ atom->offset_at(frame, object),
+ local->flags,
+ local->iobref_data,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "submit partial block: %d bytes from %d offset",
+ (int)iovec_get_size(partial, 1),
+ (int)atom->offset_at(frame, object));
+ exit:
+ return 0;
+}
+
+/*
+ * Perform a (read-)modify-write sequence.
+ * This should be performed only after approval
+ * of upper server-side manager, i.e. the caller
+ * needs to make sure this is his turn to rmw.
+ */
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype)
+{
+ int32_t ret;
+ dict_t *dict;
+ struct rmw_atom *atom;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ atom = atom_by_types(local->active_setup, ltype);
+ /*
+ * To perform the "read" component of the read-modify-write
+ * sequence the crypt translator does stack_wind to itself.
+ *
+ * Pass current file size to crypt_readv()
+ */
+ dict = dict_new();
+ if (!dict) {
+ /*
+ * FIXME: Handle the error
+ */
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ return;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ /*
+ * FIXME: Handle the error
+ */
+ dict_unref(dict);
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ atom->rmw,
+ this,
+ this->fops->readv, /* crypt_readv */
+ fd,
+ atom->count_to_uptodate(frame, object), /* count */
+ atom->offset_at(frame, object), /* offset to read from */
+ 0,
+ dict);
+ exit:
+ dict_unref(dict);
+}
+
+/*
+ * submit blocks of FULL_ATOM type
+ */
+void submit_full(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
+ uint32_t count; /* total number of full blocks to submit */
+ uint32_t granularity; /* number of blocks to submit in one iteration */
+
+ uint64_t off_in_file; /* start offset in the file, bytes */
+ uint32_t off_in_atom; /* start offset in the atom, blocks */
+ uint32_t blocks_written = 0; /* blocks written for this submit */
+
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_full_block;
+ /*
+ * Write full blocks by groups of granularity size.
+ */
+ end_writeback_full_block = dispatch_end_writeback(local->fop);
+
+ if (is_ordered_mode(frame)) {
+ uint32_t skip = has_head_block(conf) ? 1 : 0;
+ count = 1;
+ granularity = 1;
+ /*
+ * calculate start offset using cursor value;
+ * here we should take into accout head block,
+ * which corresponds to cursor value 0.
+ */
+ off_in_file = atom->offset_at(frame, object) +
+ ((conf->cursor - skip) << get_atom_bits(object));
+ off_in_atom = conf->cursor - skip;
+ }
+ else {
+ /*
+ * in parallel mode
+ */
+ count = conf->nr_full_blocks;
+ granularity = MAX_IOVEC;
+ off_in_file = atom->offset_at(frame, object);
+ off_in_atom = 0;
+ }
+ while (count) {
+ uint32_t blocks_to_write = count;
+
+ if (blocks_to_write > granularity)
+ blocks_to_write = granularity;
+ if (conf->type == HOLE_ATOM)
+ /*
+ * reset iovec before encryption
+ */
+ memset(atom->get_iovec(frame, 0)->iov_base,
+ 0,
+ get_atom_size(object));
+ /*
+ * encrypt the group
+ */
+ encrypt_aligned_iov(object,
+ atom->get_iovec(frame,
+ off_in_atom +
+ blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written <<
+ get_atom_bits(object)));
+
+ set_local_io_params_writev(frame, object, atom,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ blocks_to_write << get_atom_bits(object));
+
+ conf->cursor += blocks_to_write;
+
+ STACK_WIND(frame,
+ end_writeback_full_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ atom->get_iovec(frame, off_in_atom + blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ local->flags,
+ local->iobref_data ? local->iobref_data : local->iobref,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
+ blocks_to_write,
+ (int)(off_in_file + (blocks_written << get_atom_bits(object))));
+
+ count -= blocks_to_write;
+ blocks_written += blocks_to_write;
+ }
+ return;
+}
+
+static int32_t rmw_data_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_data_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, TAIL_ATOM));
+}
+
+static int32_t rmw_hole_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_hole_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, TAIL_ATOM));
+}
+
+/*
+ * atom->count_to_uptodate()
+ */
+static uint32_t count_to_uptodate_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ if (conf->acount == 1 && conf->off_in_tail)
+ return get_atom_size(object);
+ else
+ /* there is no need to read the whole head block */
+ return conf->off_in_head;
+}
+
+static uint32_t count_to_uptodate_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ /* we need to read the whole tail block */
+ return get_atom_size(object);
+}
+
+static uint32_t count_to_uptodate_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_hole_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_hole_conf(frame), object);
+}
+
+/* atom->get_config() */
+
+static struct avec_config *get_config_data(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->data_conf;
+}
+
+static struct avec_config *get_config_hole(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->hole_conf;
+}
+
+/*
+ * atom->get_iovec()
+ */
+static struct iovec *get_iovec_hole_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec;
+}
+
+static struct iovec *get_iovec_hole_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0);
+}
+
+static inline struct iovec *get_iovec_hole_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->blocks_in_pool - 1);
+}
+
+static inline struct iovec *get_iovec_data_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec;
+}
+
+static inline struct iovec *get_iovec_data_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0) + count;
+}
+
+static inline struct iovec *get_iovec_data_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec +
+ (conf->off_in_head ? 1 : 0) +
+ conf->nr_full_blocks;
+}
+
+static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
+ [DATA_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_data_head,
+ .offset_at = offset_at_data_head,
+ .offset_in = offset_in_data_head,
+ .get_iovec = get_iovec_data_head,
+ .io_size_nopad = io_size_nopad_data_head,
+ .count_to_uptodate = count_to_uptodate_data_head,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_data_tail,
+ .offset_at = offset_at_data_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_data_tail,
+ .io_size_nopad = io_size_nopad_data_tail,
+ .count_to_uptodate = count_to_uptodate_data_tail,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_data_full,
+ .offset_in = offset_in_data_full,
+ .get_iovec = get_iovec_data_full,
+ .io_size_nopad = io_size_nopad_data_full,
+ .get_config = get_config_data
+ },
+ [HOLE_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_hole_head,
+ .offset_at = offset_at_hole_head,
+ .offset_in = offset_in_hole_head,
+ .get_iovec = get_iovec_hole_head,
+ .io_size_nopad = io_size_nopad_hole_head,
+ .count_to_uptodate = count_to_uptodate_hole_head,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_hole_tail,
+ .offset_at = offset_at_hole_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_hole_tail,
+ .io_size_nopad = io_size_nopad_hole_tail,
+ .count_to_uptodate = count_to_uptodate_hole_tail,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_hole_full,
+ .offset_in = offset_in_hole_full,
+ .get_iovec = get_iovec_hole_full,
+ .io_size_nopad = io_size_nopad_hole_full,
+ .get_config = get_config_hole
+ }
+};
+
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality)
+{
+ return &atoms[data][locality];
+}
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
new file mode 100644
index 000000000..7c212ad5d
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-common.h
@@ -0,0 +1,141 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_COMMON_H__
+#define __CRYPT_COMMON_H__
+
+#define INVAL_SUBVERSION_NUMBER (0xff)
+#define CRYPT_INVAL_OP (GF_FOP_NULL)
+
+#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
+#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
+#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
+#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
+#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
+#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
+#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
+
+
+/* messages for crypt_open() */
+#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
+#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
+
+#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
+
+#define noop do {; } while (0)
+#define cassert(cond) ({ switch (-1) { case (cond): case 0: break; } })
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+/*
+ * Format of file's metadata
+ */
+struct crypt_format {
+ uint8_t loader_id; /* version of metadata loader */
+ uint8_t versioned[0]; /* file's metadata of specific version */
+} __attribute__((packed));
+
+typedef enum {
+ AES_CIPHER_ALG,
+ LAST_CIPHER_ALG
+} cipher_alg_t;
+
+typedef enum {
+ XTS_CIPHER_MODE,
+ LAST_CIPHER_MODE
+} cipher_mode_t;
+
+typedef enum {
+ MTD_LOADER_V1,
+ LAST_MTD_LOADER
+} mtd_loader_id;
+
+static inline void msgflags_set_mtd_rlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline void msgflags_set_mtd_wlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_rlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_wlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_lock(uint32_t *flags)
+{
+ return msgflags_check_mtd_rlock(flags) ||
+ msgflags_check_mtd_wlock(flags);
+}
+
+/*
+ * returns number of logical blocks occupied
+ * (maybe partially) by @count bytes
+ * at offset @start.
+ */
+static inline off_t logical_blocks_occupied(uint64_t start, off_t count,
+ int blkbits)
+{
+ return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
+}
+
+/*
+ * are two bytes (represented by offsets @off1
+ * and @off2 respectively) in the same logical
+ * block.
+ */
+static inline int in_same_lblock(uint64_t off1, uint64_t off2,
+ int blkbits)
+{
+ return off1 >> blkbits == off2 >> blkbits;
+}
+
+static inline void dump_cblock(xlator_t *this, unsigned char *buf)
+{
+ gf_log(this->name, GF_LOG_DEBUG,
+ "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
+ (buf)[0],
+ (buf)[1],
+ (buf)[2],
+ (buf)[3],
+ (buf)[4],
+ (buf)[5],
+ (buf)[6],
+ (buf)[7],
+ (buf)[8],
+ (buf)[9],
+ (buf)[10],
+ (buf)[11],
+ (buf)[12],
+ (buf)[13],
+ (buf)[14],
+ (buf)[15]);
+}
+
+#endif /* __CRYPT_COMMON_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
new file mode 100644
index 000000000..799727573
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-mem-types.h
@@ -0,0 +1,43 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CRYPT_MEM_TYPES_H__
+#define __CRYPT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_crypt_mem_types_ {
+ gf_crypt_mt_priv = gf_common_mt_end + 1,
+ gf_crypt_mt_inode,
+ gf_crypt_mt_data,
+ gf_crypt_mt_mtd,
+ gf_crypt_mt_loc,
+ gf_crypt_mt_iatt,
+ gf_crypt_mt_key,
+ gf_crypt_mt_iovec,
+ gf_crypt_mt_char,
+};
+
+#endif /* __CRYPT_MEM_TYPES_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
+
+
+
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
new file mode 100644
index 000000000..db2e6d83c
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.c
@@ -0,0 +1,4498 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <ctype.h>
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+static int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t from, off_t size);
+static int32_t load_file_size(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata);
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void put_one_call_open(call_frame_t *frame);
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this);
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this);
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
+static void free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
+static void free_avec_data(crypt_local_t *local);
+static void free_avec_hole(crypt_local_t *local);
+
+static crypt_local_t *crypt_alloc_local(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop)
+{
+ crypt_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ return NULL;
+ }
+ local->fop = fop;
+ LOCK_INIT(&local->hole_lock);
+ LOCK_INIT(&local->call_lock);
+ LOCK_INIT(&local->rw_count_lock);
+
+ frame->local = local;
+ return local;
+}
+
+struct crypt_inode_info *get_crypt_inode_info(inode_t *inode, xlator_t *this)
+{
+ int ret;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not get inode info");
+ return NULL;
+ }
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not obtain inode info");
+ return NULL;
+ }
+ return info;
+}
+
+static struct crypt_inode_info *local_get_inode_info(crypt_local_t *local,
+ xlator_t *this)
+{
+ if (local->info)
+ return local->info;
+ local->info = get_crypt_inode_info(local->fd->inode, this);
+ return local->info;
+}
+
+static struct crypt_inode_info *alloc_inode_info(crypt_local_t *local,
+ loc_t *loc)
+{
+ struct crypt_inode_info *info;
+
+ info = GF_CALLOC(1, sizeof(*info), gf_crypt_mt_inode);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_WARNING,
+ "Can not allocate inode info");
+ return NULL;
+ }
+ memset(info, 0, sizeof(*info));
+#if DEBUG_CRYPT
+ info->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!info->loc) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
+ GF_FREE(info);
+ return NULL;
+ }
+ if (loc_copy(info->loc, loc)){
+ GF_FREE(info->loc);
+ GF_FREE(info);
+ return NULL;
+ }
+#endif /* DEBUG_CRYPT */
+
+ local->info = info;
+ return info;
+}
+
+static void free_inode_info(struct crypt_inode_info *info)
+{
+#if DEBUG_CRYPT
+ loc_wipe(info->loc);
+ GF_FREE(info->loc);
+#endif
+ memset(info, 0, sizeof(*info));
+ GF_FREE(info);
+}
+
+int crypt_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ if (!inode_ctx_del (inode, this, &ctx_addr))
+ free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
+ return 0;
+}
+
+#if DEBUG_CRYPT
+static void check_read(call_frame_t *frame, xlator_t *this, int32_t read,
+ struct iovec *vec, int32_t count, struct iatt *stbuf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = get_object_cinfo(local->info);
+ struct avec_config *conf = &local->data_conf;
+ uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
+
+ if (read <= 0)
+ return;
+ if (read != iovec_get_size(vec, count))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "op_ret differs from amount of read bytes");
+
+ if (object_alg_should_pad(object) && (read & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (!= 0 mod(cblock size))");
+
+ if (conf->aligned_offset + read >
+ stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (too large))");
+
+}
+
+#define PT_BYTES_TO_DUMP (32)
+static void dump_plain_text(crypt_local_t *local, struct iovec *avec)
+{
+ int32_t to_dump;
+ char str[PT_BYTES_TO_DUMP + 1];
+
+ if (!avec)
+ return;
+ to_dump = avec->iov_len;
+ if (to_dump > PT_BYTES_TO_DUMP)
+ to_dump = PT_BYTES_TO_DUMP;
+ memcpy(str, avec->iov_base, to_dump);
+ memset(str + to_dump, '0', 1);
+ gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
+}
+
+static int32_t data_conf_invariant(struct avec_config *conf)
+{
+ return conf->acount ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ conf->nr_full_blocks;
+}
+
+static int32_t hole_conf_invariant(struct avec_config *conf)
+{
+ return conf->blocks_in_pool ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ !!has_full_blocks(conf);
+}
+
+static void crypt_check_conf(struct avec_config *conf)
+{
+ int32_t ret = 0;
+ const char *msg;
+
+ switch (conf->type) {
+ case DATA_ATOM:
+ msg = "data";
+ ret = data_conf_invariant(conf);
+ break;
+ case HOLE_ATOM:
+ msg = "hole";
+ ret = hole_conf_invariant(conf);
+ break;
+ default:
+ msg = "unknown";
+ }
+ if (!ret)
+ gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
+}
+
+static void check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ uint64_t local_file_size;
+
+ switch(local->fop) {
+ case GF_FOP_FTRUNCATE:
+ return;
+ case GF_FOP_WRITE:
+ local_file_size = local->new_file_size;
+ break;
+ case GF_FOP_READ:
+ if (parent_is_crypt_xlator(frame, this))
+ return;
+ local_file_size = local->cur_file_size;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
+ return;
+ }
+ if (buf->ia_size != round_up(local_file_size,
+ object_alg_blksize(object)))
+ gf_log("crypt", GF_LOG_DEBUG,
+ "bad ia_size in buf (%llu), should be %llu",
+ (unsigned long long)buf->ia_size,
+ (unsigned long long)round_up(local_file_size,
+ object_alg_blksize(object)));
+}
+
+#else
+#define check_read(frame, this, op_ret, vec, count, stbuf) noop
+#define dump_plain_text(local, avec) noop
+#define crypt_check_conf(conf) noop
+#define check_buf(frame, this, buf) noop
+#endif /* DEBUG_CRYPT */
+
+/*
+ * Pre-conditions:
+ * @vec represents a ciphertext of expanded size and
+ * aligned offset.
+ *
+ * Compound a temporal vector @avec with block-aligned
+ * components, decrypt and fix it up to represent a chunk
+ * of data corresponding to the original size and offset.
+ * Pass the result to the next translator.
+ */
+int32_t crypt_readv_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *avec;
+ uint32_t i;
+ uint32_t to_vec;
+ uint32_t to_user;
+
+ check_buf(frame, this, stbuf);
+ check_read(frame, this, op_ret, vec, count, stbuf);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->iobref = iobref_ref(iobref);
+
+ local->buf = *stbuf;
+ local->buf.ia_size = local->cur_file_size;
+
+ if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
+ goto put_one_call;
+
+ if (conf->orig_offset >= local->cur_file_size) {
+ local->op_ret = 0;
+ goto put_one_call;
+ }
+ /*
+ * correct config params with real file size
+ * and actual amount of bytes read
+ */
+ set_config_offsets(frame, this,
+ conf->orig_offset, op_ret, DATA_ATOM, 0);
+
+ if (conf->orig_offset + conf->orig_size > local->cur_file_size)
+ conf->orig_size = local->cur_file_size - conf->orig_offset;
+ /*
+ * calculate amount of data to be returned
+ * to user.
+ */
+ to_user = op_ret;
+ if (conf->aligned_offset + to_user <= conf->orig_offset) {
+ gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ to_user -= (conf->aligned_offset - conf->orig_offset);
+
+ if (to_user > conf->orig_size)
+ to_user = conf->orig_size;
+ local->rw_count = to_user;
+
+ op_errno = set_config_avec_data(this, local,
+ conf, object, vec, count);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto put_one_call;
+ }
+ avec = conf->avec;
+#if DEBUG_CRYPT
+ if (conf->off_in_tail != 0 &&
+ conf->off_in_tail < object_alg_blksize(object) &&
+ object_alg_should_pad(object))
+ gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
+ conf->off_in_tail);
+ if (iovec_get_size(vec, count) != 0 &&
+ in_same_lblock(conf->orig_offset + iovec_get_size(vec, count) - 1,
+ local->cur_file_size - 1,
+ object_alg_blkbits(object))) {
+ gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
+ dump_cblock(this,
+ (unsigned char *)(avec[conf->acount - 1].iov_base) +
+ avec[conf->acount - 1].iov_len - object_alg_blksize(object));
+ dump_cblock(this,
+ (unsigned char *)(vec[count - 1].iov_base) +
+ vec[count - 1].iov_len - object_alg_blksize(object));
+ }
+#endif
+ decrypt_aligned_iov(object, avec,
+ conf->acount, conf->aligned_offset);
+ /*
+ * pass proper plain data to user
+ */
+ avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
+ avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
+
+ to_vec = to_user;
+ for (i = 0; i < conf->acount; i++) {
+ if (avec[i].iov_len > to_vec)
+ avec[i].iov_len = to_vec;
+ to_vec -= avec[i].iov_len;
+ }
+ put_one_call:
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t do_readv(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ local->flags,
+ local->xdata);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call(frame);
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t crypt_readv_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size
+ */
+ STACK_WIND(frame,
+ do_readv,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ fd_unref(local->fd);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ op_errno,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t readv_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "stat failed (%d)", op_errno);
+ goto error;
+ }
+ local->buf = *buf;
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t crypt_readv(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
+ (int)size, (long long)offset);
+ if (parent_is_crypt_xlator(frame, this))
+ gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_READ);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ if (size == 0)
+ goto trivial;
+
+ local->fd = fd_ref(fd);
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ set_config_offsets(frame, this, offset, size,
+ DATA_ATOM, 0);
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * We are called by crypt_writev (or cypt_ftruncate)
+ * to perform the "read" component of the read-modify-write
+ * (or read-prune-write) sequence for some atom;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ *
+ * Retrieve current file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ flags,
+ NULL);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_readv_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ readv_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ ret,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom,
+ off_t io_offset,
+ uint32_t io_size)
+{
+ crypt_local_t *local = frame->local;
+
+ local->io_offset = io_offset;
+ local->io_size = io_size;
+
+ local->io_offset_nopad =
+ atom->offset_at(frame, object) + atom->offset_in(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad offset to %llu",
+ (unsigned long long)local->io_offset_nopad);
+
+ local->io_size_nopad = atom->io_size_nopad(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad size to %llu",
+ (unsigned long long)local->io_size_nopad);
+
+ local->update_disk_file_size = 0;
+ /*
+ * NOTE: eof_padding_size is 0 for all full atoms;
+ * For head and tail atoms it will be set up at rmw_partial block()
+ */
+ local->new_file_size = local->cur_file_size;
+
+ if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
+
+ local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set new file size to %llu",
+ (unsigned long long)local->new_file_size);
+
+ local->update_disk_file_size = 1;
+ }
+}
+
+void set_local_io_params_ftruncate(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ uint32_t resid;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ resid = conf->orig_offset & (object_alg_blksize(object) - 1);
+ if (resid) {
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+ local->new_file_size = conf->aligned_offset;
+ local->update_disk_file_size = 0;
+ /*
+ * file size will be updated
+ * in the ->writev() stack,
+ * when submitting file tail
+ */
+ }
+ else {
+ local->eof_padding_size = 0;
+ local->new_file_size = conf->orig_offset;
+ local->update_disk_file_size = 1;
+ /*
+ * file size will be updated
+ * in this ->ftruncate stack
+ */
+ }
+}
+
+static inline void submit_head(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, HEAD_ATOM);
+}
+
+static inline void submit_tail(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, TAIL_ATOM);
+}
+
+static void submit_hole(call_frame_t *frame, xlator_t *this)
+{
+ /*
+ * hole conversion always means
+ * appended write and goes in ordered fashion
+ */
+ do_ordered_submit(frame, this, HOLE_ATOM);
+}
+
+static void submit_data(call_frame_t *frame, xlator_t *this)
+{
+ if (is_ordered_mode(frame)) {
+ do_ordered_submit(frame, this, DATA_ATOM);
+ return;
+ }
+ gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
+ get_nr_calls(frame, nr_calls_data(frame));
+ do_parallel_submit(frame, this, DATA_ATOM);
+ return;
+}
+
+/*
+ * heplers called by writev_cbk, fruncate_cbk in ordered mode
+ */
+
+static inline int32_t should_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ return conf->avec != NULL;
+}
+
+static inline int32_t should_resume_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
+ /*
+ * Don't submit a part of hole, which
+ * fits into a data block:
+ * this part of hole will be converted
+ * as a gap filled by zeros in data head
+ * block.
+ */
+ return conf->cursor < conf->acount - 1;
+ else
+ return conf->cursor < conf->acount;
+}
+
+static inline int32_t should_resume_submit_data(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ if (is_ordered_mode(frame))
+ return conf->cursor < conf->acount;
+ /*
+ * parallel writes
+ */
+ return 0;
+}
+
+static inline int32_t should_submit_data_after_hole(crypt_local_t *local)
+{
+ return local->data_conf.avec != NULL;
+}
+
+static void update_local_file_params(call_frame_t *frame,
+ xlator_t *this,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ crypt_local_t *local = frame->local;
+
+ check_buf(frame, this, postbuf);
+
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->new_file_size;
+
+ local->cur_file_size = local->new_file_size;
+}
+
+static int32_t end_writeback_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret <= 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "writev iteration failed");
+ goto put_one_call;
+ }
+ /*
+ * op_ret includes paddings (atom's head, atom's tail and EOF)
+ */
+ if (op_ret < local->io_size) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Incomplete writev iteration");
+ goto put_one_call;
+ }
+ op_ret -= local->eof_padding_size;
+ local->op_ret = op_ret;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local)) {
+
+ LOCK(&local->rw_count_lock);
+ local->rw_count += op_ret;
+ UNLOCK(&local->rw_count_lock);
+
+ if (should_resume_submit_data(frame))
+ submit_data(frame, this);
+ }
+ else {
+ /*
+ * hole conversion is going on;
+ * don't take into account written zeros
+ */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+
+ else if (should_submit_data_after_hole(local))
+ submit_data(frame, this);
+ }
+ put_one_call:
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+#define crypt_writev_cbk end_writeback_writev
+
+#define HOLE_WRITE_CHUNK_BITS 12
+#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
+
+/*
+ * Convert hole of size @size at offset @off to
+ * zeros and prepare respective iovecs for submit.
+ * The hole lock should be held.
+ *
+ * Pre-conditions:
+ * @local->file_size is set and valid.
+ */
+int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t off, off_t size)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
+
+ ret = set_config_avec_hole(this, local,
+ &local->hole_conf, object, local->fop);
+ crypt_check_conf(&local->hole_conf);
+
+ return ret;
+}
+
+/*
+ * prepare for submit @count bytes at offset @from
+ */
+int32_t prepare_for_submit_data(call_frame_t *frame, xlator_t *this,
+ off_t from, int32_t size, struct iovec *vec,
+ int32_t vec_count, int32_t setup_gap)
+{
+ uint32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, from, size,
+ DATA_ATOM, setup_gap);
+
+ ret = set_config_avec_data(this, local,
+ &local->data_conf, object, vec, vec_count);
+ crypt_check_conf(&local->data_conf);
+
+ return ret;
+}
+
+static void free_avec(struct iovec *avec,
+ char **pool, int blocks_in_pool)
+{
+ if (!avec)
+ return;
+ GF_FREE(pool);
+ GF_FREE(avec);
+}
+
+static void free_avec_data(crypt_local_t *local)
+{
+ return free_avec(local->data_conf.avec,
+ local->data_conf.pool,
+ local->data_conf.blocks_in_pool);
+}
+
+static void free_avec_hole(crypt_local_t *local)
+{
+ return free_avec(local->hole_conf.avec,
+ local->hole_conf.pool,
+ local->hole_conf.blocks_in_pool);
+}
+
+
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (has_head_block(conf))
+ submit_head(frame, this);
+
+ if (has_full_blocks(conf))
+ submit_full(frame, this);
+
+ if (has_tail_block(conf))
+ submit_tail(frame, this);
+ return;
+}
+
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (should_submit_head_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_head(frame, this);
+ }
+ else if (should_submit_full_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_full(frame, this);
+ }
+ else if (should_submit_tail_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_tail(frame, this);
+ }
+ else
+ gf_log("crypt", GF_LOG_DEBUG,
+ "nothing has been submitted in ordered mode");
+ return;
+}
+
+static int32_t do_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
+
+ if (local->cur_file_size < local->data_conf.orig_offset) {
+ /*
+ * Set up hole config
+ */
+ op_errno = prepare_for_submit_hole(frame,
+ this,
+ local->cur_file_size,
+ local->data_conf.orig_offset - local->cur_file_size);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto error;
+ }
+ }
+ if (should_submit_hole(local))
+ submit_hole(frame, this);
+ else
+ submit_data(frame, this);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t crypt_writev_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_writev,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t writev_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+int crypt_writev(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vec,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+#if DEBUG_CRYPT
+ gf_log ("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
+ (int)iovec_get_size(vec, count), (long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ if (iobref)
+ local->iobref = iobref_ref(iobref);
+ /*
+ * to update real file size on the server
+ */
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (iovec_get_size(vec, count) == 0)
+ goto trivial;
+
+ ret = prepare_for_submit_data(frame, this, offset,
+ iovec_get_size(vec, count),
+ vec, count, 0 /* don't setup gup
+ in tail: we don't
+ know file size yet */);
+ if (ret)
+ goto error;
+
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * we are called by shinking crypt_ftruncate(),
+ * which doesn't perform hole conversion;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ */
+
+ /*
+ * extract file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ submit_data(frame, this);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ /*
+ * lock the file and retrieve its size
+ */
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_writev_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ writev_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->iobref)
+ iobref_unref(iobref);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(writev, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ set_config_offsets(frame, this,
+ offset,
+ 0, /* count */
+ DATA_ATOM,
+ 0 /* since we prune, there is no
+ gap in tail to uptodate */);
+ return 0;
+}
+
+/*
+ * Finish the read-prune-modify sequence
+ *
+ * Can be invoked as
+ * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
+ * 2) ->writev_cbk() for non-cblock-aligned prune
+ */
+
+static int32_t prune_complete(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as ->ftruncate_cbk()
+ *
+ * Perform the "write" component of the
+ * read-prune-write sequence.
+ *
+ * submuit the rest of the file
+ */
+static int32_t prune_submit_file_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ dict_t *dict;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+ local->new_file_size = conf->orig_offset;
+
+ /*
+ * The rest of the file is a partial block and, hence,
+ * should be written via RMW sequence, so the crypt xlator
+ * does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_writev()
+ */
+ op_errno = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (op_errno) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ dict_unref(dict);
+ goto error;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "passing current file size (%llu) to crypt_writev",
+ (unsigned long long)local->cur_file_size);
+ /*
+ * Padding will be filled with
+ * zeros by rmw_partial_block()
+ */
+ STACK_WIND(frame,
+ prune_complete,
+ this,
+ this->fops->writev, /* crypt_writev */
+ local->fd,
+ &local->vec,
+ 1,
+ conf->aligned_offset, /* offset to write from */
+ 0,
+ local->iobref,
+ dict);
+
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as a callback of ->writev() invoked in behalf
+ * of ftruncate(): it can be
+ * 1) ordered writes issued by hole conversion in the case of
+ * expanded truncate, or
+ * 2) an rmw partial data block issued by non-cblock-aligned
+ * prune.
+ */
+int32_t end_writeback_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ /*
+ * if nothing has been written,
+ * then it must be an error
+ */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local))
+ /* case (2) */
+ goto put_one_call;
+ /* case (1) */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+ /*
+ * case of hole, when we should't resume
+ */
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform prune and write components of the
+ * read-prune-write sequence.
+ *
+ * Called as ->readv_cbk()
+ *
+ * Pre-conditions:
+ * @vec contains the latest atom of the file
+ * (plain text)
+ */
+static int32_t prune_write(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t i;
+ size_t to_copy;
+ size_t copied = 0;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret == -1)
+ goto put_one_call;
+
+ /*
+ * At first, uptodate head block
+ */
+ if (iovec_get_size(vec, count) < conf->off_in_head) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to uptodate head block for prune");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ local->vec.iov_len = conf->off_in_head;
+ local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len,
+ gf_crypt_mt_data);
+
+ if (local->vec.iov_base == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+ for (i = 0; i < count; i++) {
+ to_copy = vec[i].iov_len;
+ if (to_copy > local->vec.iov_len - copied)
+ to_copy = local->vec.iov_len - copied;
+
+ memcpy((char *)local->vec.iov_base + copied,
+ vec[i].iov_base,
+ to_copy);
+ copied += to_copy;
+ if (copied == local->vec.iov_len)
+ break;
+ }
+ /*
+ * perform prune with aligned offset
+ * (i.e. at this step we prune a bit
+ * more then it is needed
+ */
+ STACK_WIND(frame,
+ prune_submit_file_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->aligned_offset,
+ local->xdata);
+ return 0;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform a read-prune-write sequence
+ */
+int32_t read_prune_write(call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_local_io_params_ftruncate(frame, object);
+ get_one_call_nolock(frame);
+
+ if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
+ /*
+ * cblock-aligned prune:
+ * we don't need read and write components,
+ * just cut file body
+ */
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune without RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+
+ STACK_WIND(frame,
+ prune_complete,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->orig_offset,
+ local->xdata);
+ return 0;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune with RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+ /*
+ * We are about to perform the "read" component of the
+ * read-prune-write sequence. It means that we need to
+ * read encrypted data from disk and decrypt it.
+ * So, the crypt translator does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_readv()
+
+ */
+ dict = dict_new();
+ if (!dict) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ ret = ENOMEM;
+ goto exit;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ prune_write,
+ this,
+ this->fops->readv, /* crypt_readv */
+ local->fd,
+ get_atom_size(object), /* bytes to read */
+ conf->aligned_offset, /* offset to read from */
+ 0,
+ dict);
+ exit:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*
+ * File prune is more complicated than expand.
+ * First we need to read the latest atom to not lose info
+ * needed for proper update. Also we need to make sure that
+ * every component of read-prune-write sequence leaves data
+ * consistent
+ *
+ * Non-cblock aligned prune is performed as read-prune-write
+ * sequence:
+ *
+ * 1) read the latest atom;
+ * 2) perform cblock-aligned prune
+ * 3) issue a write request for the end-of-file
+ */
+int32_t prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ int32_t ret;
+
+ ret = prepare_for_prune(frame, this, offset);
+ if (ret)
+ return ret;
+ return read_prune_write(frame, this);
+}
+
+int32_t expand_file(call_frame_t *frame, xlator_t *this,
+ uint64_t offset)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+
+ ret = prepare_for_submit_hole(frame, this,
+ local->old_file_size,
+ offset - local->old_file_size);
+ if (ret)
+ return ret;
+ submit_hole(frame, this);
+ return 0;
+}
+
+static int32_t ftruncate_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t do_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ if (local->data_conf.orig_offset == local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG,
+ "trivial ftruncate (current file size %llu)",
+ (unsigned long long)local->cur_file_size);
+#endif
+ goto trivial;
+ }
+ else if (local->data_conf.orig_offset < local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = prune_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ else {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = expand_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ if (op_errno)
+ goto error;
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ ftruncate_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ /*
+ * finish with ftruncate
+ */
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_ftruncate,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * ftruncate is performed in 2 steps:
+ * . recieve file size;
+ * . expand or prune file.
+ */
+static int32_t crypt_ftruncate(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ local->data_conf.orig_offset = offset;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_ftruncate_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+/* ->flush_cbk() */
+int32_t truncate_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local->xdata);
+ return 0;
+}
+
+/* ftruncate_cbk() */
+int32_t truncate_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *fd = local->fd;
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ STACK_WIND(frame,
+ truncate_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd,
+ NULL);
+ fd_unref(fd);
+ return 0;
+}
+
+/*
+ * is called as ->open_cbk()
+ */
+static int32_t truncate_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0) {
+ fd_unref(fd);
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+ /*
+ * crypt_truncate() is implemented via crypt_ftruncate(),
+ * so the crypt xlator does STACK_WIND to itself here
+ */
+ STACK_WIND(frame,
+ truncate_flush,
+ this,
+ this->fops->ftruncate, /* crypt_ftruncate */
+ fd,
+ local->offset,
+ NULL);
+ return 0;
+}
+
+/*
+ * crypt_truncate() is implemented via crypt_ftruncate() as a
+ * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
+ */
+int32_t crypt_truncate(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset,
+ dict_t *xdata)
+{
+ fd_t *fd;
+ crypt_local_t *local;
+
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "truncate file %s at offset %llu",
+ loc->path, (unsigned long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
+ if (!local)
+ goto error;
+
+ fd = fd_create(loc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->offset = offset;
+ local->xdata = xdata;
+ STACK_WIND(frame,
+ truncate_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ loc,
+ O_RDWR,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return end_writeback_writev;
+ case GF_FOP_FTRUNCATE:
+ return end_writeback_ftruncate;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
+ return NULL;
+ }
+}
+
+/*
+ * true, if the caller needs metadata string
+ */
+static int32_t is_custom_mtd(dict_t *xdata)
+{
+ data_t *data;
+ uint32_t flags;
+
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, MSGFLAGS_PREFIX);
+ if (!data)
+ return 0;
+ if (data->len != sizeof(uint32_t)) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Bad msgflags size (%d)", data->len);
+ return -1;
+ }
+ flags = *((uint32_t *)data->data);
+ return msgflags_check_mtd_lock(&flags);
+}
+
+static int32_t crypt_open_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret < 0)
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ put_one_call_open(frame);
+ return 0;
+}
+
+static void crypt_open_tail(call_frame_t *frame, xlator_t *this)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_open_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+}
+
+/*
+ * load private inode info at open time
+ * called as ->fgetxattr_cbk()
+ */
+static int load_mtd_open(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret;
+ gf_boolean_t upload_info;
+ data_t *mtd;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ /*
+ * first, check for cached info
+ */
+ ret = inode_ctx_get(local->fd->inode, this, &value);
+ if (ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Inode info expected, but not found");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * info has been found in the cache
+ */
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info hasn't been found in the cache.
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto exit;
+ }
+ init_inode_info_head(info, local->fd);
+ upload_info = _gf_true;
+ }
+ /*
+ * extract metadata
+ */
+ mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
+ if (!mtd) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Format string wasn't found");
+ goto exit;
+ }
+ /*
+ * authenticate metadata against the path
+ */
+ ret = open_format((unsigned char *)mtd->data,
+ mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ if (upload_info) {
+ ret = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ ret = inode_ctx_put(local->fd->inode,
+ this, (uint64_t)(long)info);
+ if (ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ }
+ if (local->custom_mtd) {
+ /*
+ * pass the metadata string to the customer
+ */
+ ret = dict_set_static_bin(local->xdata,
+ CRYPTO_FORMAT_PREFIX,
+ mtd->data,
+ mtd->len);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ }
+ exit:
+ if (!local->custom_mtd)
+ crypt_open_tail(frame, this);
+ else
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ load_mtd_open,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ CRYPTO_FORMAT_PREFIX,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+/*
+ * verify metadata against the specified pathname
+ */
+static int32_t crypt_open_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ else if (local->custom_mtd){
+ local->xdata = dict_new();
+ if (!local->xdata) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_ERROR,
+ "Can not get new dict for mtd string");
+ goto exit;
+ }
+ }
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_open_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc) {
+ ret = ENOMEM;
+ goto error;
+ }
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ ret = is_custom_mtd(xdata);
+ if (ret < 0) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ ret = EINVAL;
+ goto error;
+ }
+ local->custom_mtd = ret;
+
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ /*
+ * we can't open O_WRONLY, because
+ * we need to do read-modify-write
+ */
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ get_one_call_nolock(frame);
+ STACK_WIND(frame,
+ crypt_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(open,
+ frame,
+ -1,
+ ret,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct object_cipher_info *object = &info->cinfo;
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
+ uuid_utoa(info->oid));
+#endif
+ ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
+ master);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Init inode info at ->create() time
+ */
+static void init_inode_info_create(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ data_t *data)
+{
+ struct object_cipher_info *object;
+
+ info->nr_minor = CRYPT_XLATOR_ID;
+ memcpy(info->oid, data->data, data->len);
+
+ object = &info->cinfo;
+
+ object->o_alg = master->m_alg;
+ object->o_mode = master->m_mode;
+ object->o_block_bits = master->m_block_bits;
+ object->o_dkey_size = master->m_dkey_size;
+}
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
+{
+ memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
+}
+
+static int32_t crypt_create_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_private_t *priv = this->private;
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0) {
+ free_inode_info(info);
+ goto unwind;
+ }
+ op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (op_errno) {
+ op_ret = -1;
+ free_inode_info(info);
+ goto unwind;
+ }
+ /*
+ * FIXME: drop major subversion number
+ */
+ op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
+ if (op_ret == -1) {
+ op_errno = EIO;
+ free_inode_info(info);
+ goto unwind;
+ }
+ unwind:
+ free_format(local);
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int crypt_create_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ dict_unref(local->xattr);
+
+ if (op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_create_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(local->info);
+ free_format(local);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_create_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+
+ STACK_WIND(frame,
+ crypt_create_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * Create and store crypt-specific format on disk;
+ * Populate cache with private inode info
+ */
+static int32_t crypt_create_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_create_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_create(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ mode_t umask,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int ret;
+ data_t *data;
+ crypt_local_t *local;
+ crypt_private_t *priv;
+ struct master_cipher_info *master;
+ struct crypt_inode_info *info;
+
+ priv = this->private;
+ master = get_master_cinfo(priv);
+
+ if (master_alg_atomic(master)) {
+ /*
+ * We can't open O_WRONLY, because we
+ * need to do read-modify-write.
+ */
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ }
+ local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ data = dict_get(xdata, "gfid-req");
+ if (!data) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING, "gfid not found");
+ goto error;
+ }
+ if (data->len != sizeof(uuid_t)) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad gfid size (%d), should be %d",
+ (int)data->len, (int)sizeof(uuid_t));
+ goto error;
+ }
+ info = alloc_inode_info(local, loc);
+ if (!info){
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * NOTE:
+ * format has to be created BEFORE
+ * proceeding to the untrusted server
+ */
+ ret = alloc_format_create(local);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ init_inode_info_create(info, master, data);
+
+ ret = create_format(local->format,
+ loc,
+ info,
+ master);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_format_size());
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame,
+ crypt_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ umask,
+ fd,
+ xdata);
+ return 0;
+ error:
+ gf_log("crypt", GF_LOG_WARNING, "can not create file");
+ STACK_UNWIND_STRICT(create,
+ frame,
+ -1,
+ ret,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/*
+ * FIXME: this should depends on the version of format string
+ */
+static int32_t filter_crypt_xattr(dict_t *dict,
+ char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
+static int32_t crypt_fsetxattr(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * TBD: verify file metadata before wind
+ */
+static int32_t crypt_setxattr(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t linkop_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0 &&
+ op_errno == ENOENT &&
+ local->loc->inode->ia_type == IA_IFLNK) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * unpin inode on the server
+ */
+static int32_t link_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ link_unwind(frame);
+ return 0;
+}
+
+void link_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ inode_t *inode;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(link,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ inode = local->inode;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc) {
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(link,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (inode)
+ inode_unref(inode);
+}
+
+void link_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ link_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+/*
+ * unlink()
+ */
+static int32_t unlink_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unlink_unwind(frame);
+ return 0;
+}
+
+void unlink_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+}
+
+void unlink_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ unlink_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ local->loc,
+ local->flags,
+ local->xdata);
+}
+
+void rename_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ prenewparent = local->prenewparent;
+ postnewparent = local->postnewparent;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc){
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ prenewparent,
+ postnewparent,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (prenewparent)
+ GF_FREE(prenewparent);
+ if (postnewparent)
+ GF_FREE(postnewparent);
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t rename_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ rename_unwind(frame);
+ return 0;
+}
+
+static int32_t rename_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ local->buf = *buf;
+ local->prebuf = *preoldparent;
+ local->postbuf = *postoldparent;
+ if (prenewparent) {
+ local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
+ gf_crypt_mt_iatt);
+ if (!local->prenewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->prenewparent = *prenewparent;
+ }
+ if (postnewparent) {
+ local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
+ gf_crypt_mt_iatt);
+ if (!local->postnewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->postnewparent = *postnewparent;
+ }
+ STACK_WIND(frame,
+ rename_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ rename_unwind(frame);
+ return 0;
+}
+
+void rename_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ rename_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+static int32_t __do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_wind_handler_t wind_fn;
+ linkop_unwind_handler_t unwind_fn;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret >= 0)
+ wind_fn(frame, this);
+ else {
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ unwind_fn(frame);
+ }
+ return 0;
+}
+
+static int32_t do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if(op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * Update the metadata string (against the new pathname);
+ * submit the result
+ */
+static int32_t linkop_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ gf_boolean_t upload_info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+ struct crypt_inode_info *info;
+ data_t *old_mtd;
+ uint32_t new_mtd_size;
+ uint64_t value = 0;
+ void (*unwind_fn)(call_frame_t *frame);
+ void (*wind_fn)(call_frame_t *frame, xlator_t *this);
+ mtd_op_t mop;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ mop = linkop_mtdop_dispatch(local->fop);
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto wind;
+ if (op_ret < 0)
+ /*
+ * verification failed
+ */
+ goto error;
+
+ old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
+ if (!old_mtd) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Metadata string wasn't found");
+ goto error;
+ }
+ new_mtd_size = format_size(mop, old_mtd->len);
+ op_errno = alloc_format(local, new_mtd_size);
+ if (op_errno)
+ goto error;
+ /*
+ * check for cached info
+ */
+ op_ret = inode_ctx_get(fd->inode, this, &value);
+ if (op_ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode info was not found");
+ op_errno = EINVAL;
+ goto error;
+ }
+ /*
+ * info was found in the cache
+ */
+ local->info = info;
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info wasn't found in the cache;
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info)
+ goto error;
+ init_inode_info_head(info, fd);
+ local->info = info;
+ upload_info = _gf_true;
+ }
+ op_errno = open_format((unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (op_errno)
+ goto error;
+ if (upload_info == _gf_true) {
+ op_errno = init_inode_info_tail(info,
+ get_master_cinfo(priv));
+ if (op_errno)
+ goto error;
+ op_errno = inode_ctx_put(fd->inode, this,
+ (uint64_t)(long)(info));
+ if (op_errno == -1) {
+ op_errno = EIO;
+ goto error;
+ }
+ }
+ /*
+ * update the format string (append/update/cup a MAC)
+ */
+ op_errno = update_format(local->format,
+ (unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->mac_idx,
+ mop,
+ local->newloc,
+ info,
+ get_master_cinfo(priv),
+ local);
+ if (op_errno)
+ goto error;
+ /*
+ * store the new format string on the server
+ */
+ if (new_mtd_size) {
+ op_errno = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_mtd_size);
+ if (op_errno)
+ goto error;
+ }
+ STACK_WIND(frame,
+ do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->loc,
+ local->xattr,
+ 0,
+ NULL);
+ return 0;
+ wind:
+ wind_fn(frame, this);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t linkop_grab_local(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags, dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret = ENOMEM;
+ fd_t *fd;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, op);
+ if (!local)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ fd = fd_create(oldloc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->flags = flags;
+ local->loc = GF_CALLOC(1, sizeof(*oldloc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, oldloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ local->loc = NULL;
+ goto error;
+ }
+ if (newloc) {
+ local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc);
+ if (!local->newloc) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ goto error;
+ }
+ memset(local->newloc, 0, sizeof(*local->newloc));
+ ret = loc_copy(local->newloc, newloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ GF_FREE(local->newloc);
+ goto error;
+ }
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ return 0;
+ error:
+ if (local->xdata)
+ dict_unref(local->xdata);
+ if (local->fd)
+ fd_unref(local->fd);
+ local->fd = 0;
+ local->loc = NULL;
+ local->newloc = NULL;
+
+ local->op_ret = -1;
+ local->op_errno = ret;
+
+ return ret;
+}
+
+/*
+ * read and verify locked metadata against the old pathname (via open);
+ * update the metadata string in accordance with the new pathname;
+ * submit modified metadata;
+ * wind;
+ */
+static int32_t linkop(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags,
+ dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret;
+ dict_t *dict;
+ crypt_local_t *local;
+ void (*unwind_fn)(call_frame_t *frame);
+
+ unwind_fn = linkop_unwind_dispatch(op);
+
+ ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
+ local = frame->local;
+ if (ret)
+ goto error;
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * Set a message to crypt_open() that we need
+ * locked metadata string.
+ * All link operations (link, unlink, rename)
+ * need write lock
+ */
+ msgflags_set_mtd_wlock(&local->msgflags);
+ ret = dict_set_static_bin(dict,
+ MSGFLAGS_PREFIX,
+ &local->msgflags,
+ sizeof(local->msgflags));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
+ dict_unref(dict);
+ goto error;
+ }
+ /*
+ * verify metadata against the old pathname
+ * and retrieve locked metadata string
+ */
+ STACK_WIND(frame,
+ linkop_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ oldloc,
+ O_RDWR,
+ local->fd,
+ dict);
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = ret;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t crypt_link(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
+}
+
+static int32_t crypt_unlink(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
+}
+
+static int32_t crypt_rename(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
+}
+
+static void put_one_call_open(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ fd_t *fd = local->fd;
+ loc_t *loc = local->loc;
+ dict_t *xdata = local->xdata;
+
+ STACK_UNWIND_STRICT(open,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ fd,
+ xdata);
+ fd_unref(fd);
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(loc);
+ GF_FREE(loc);
+ }
+}
+
+static int32_t __crypt_readv_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ /* read deals with data configs only */
+ struct iovec *avec = local->data_conf.avec;
+ char **pool = local->data_conf.pool;
+ int blocks_in_pool = local->data_conf.blocks_in_pool;
+ struct iobref *iobref = local->iobref;
+ struct iobref *iobref_data = local->iobref_data;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "readv unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ dump_plain_text(local, avec);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
+ (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
+ (int)(local->rw_count > 0 ? iovec_get_size(avec, local->data_conf.acount) : 0),
+ (unsigned long long)local->buf.ia_size);
+
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ local->rw_count > 0 ? local->rw_count : local->op_ret,
+ local->op_errno,
+ avec,
+ avec ? local->data_conf.acount : 0,
+ &local->buf,
+ local->iobref,
+ local_xdata);
+
+ free_avec(avec, pool, blocks_in_pool);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobref_data)
+ iobref_unref(iobref_data);
+ return 0;
+}
+
+static void crypt_readv_done(call_frame_t *frame, xlator_t *this)
+{
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_readv_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+}
+
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local))
+ crypt_readv_done(frame, this);
+}
+
+static int32_t __crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ int32_t ret_to_user;
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+ /*
+ * Calculate amout of butes to be returned
+ * to user. We need to subtract paddings that
+ * have been written as a part of atom.
+ */
+ /*
+ * subtract head padding
+ */
+ if (local->rw_count == 0)
+ /*
+ * Nothing has been written, it must be an error
+ */
+ ret_to_user = local->op_ret;
+ else if (local->rw_count <= local->data_conf.off_in_head) {
+ gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
+ ret_to_user = 0;
+ }
+ else
+ ret_to_user = local->rw_count -
+ local->data_conf.off_in_head;
+ /*
+ * subtract tail padding
+ */
+ if (ret_to_user > local->data_conf.orig_size)
+ ret_to_user = local->data_conf.orig_size;
+
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "writev: ret_to_user: %d", ret_to_user);
+
+ STACK_UNWIND_STRICT(writev,
+ frame,
+ ret_to_user,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+ return 0;
+}
+
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_writev_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+static int32_t __crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ char *iobase = local->vec.iov_base;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "ftruncate unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "ftruncate, return to user: presize=%llu, postsize=%llu",
+ (unsigned long long)local->prebuf.ia_size,
+ (unsigned long long)local->postbuf.ia_size);
+
+ STACK_UNWIND_STRICT(ftruncate,
+ frame,
+ local->op_ret < 0 ? -1 : 0,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobase)
+ GF_FREE(iobase);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ dict_unref(local->xattr);
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+}
+
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_ftruncate_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+/*
+ * load regular file size for some FOPs
+ */
+static int32_t load_file_size(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0)
+ goto unwind;
+ /*
+ * load regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+ local->buf.ia_size = data_to_uint64(data);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "FOP %d: Translate regular file to %llu",
+ local->fop,
+ (unsigned long long)local->buf.ia_size);
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? local->inode : NULL,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata,
+ op_ret >= 0 ? &local->postbuf : NULL);
+ break;
+ case GF_FOP_READ:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ 0,
+ op_ret >= 0 ? &local->buf : NULL,
+ NULL,
+ NULL);
+ break;
+ default:
+ gf_log(this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (local_inode)
+ inode_unref(local_inode);
+ return 0;
+}
+
+static int32_t crypt_stat_common_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->buf = *buf;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+}
+
+static int32_t crypt_fstat(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
+ if (!local)
+ goto error;
+ local->fd = fd_ref(fd);
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_stat(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_STAT);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_lookup_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->postbuf = *postparent;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ uuid_copy(local->loc->gfid, buf->ia_gfid);
+
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ unwind:
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t crypt_lookup(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
+ STACK_WIND(frame,
+ crypt_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * for every regular directory entry find its real file size
+ * and update stat's buf properly
+ */
+static int32_t crypt_readdirp_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ data_t *data;
+
+ if (!IA_ISREG(entry->d_stat.ia_type))
+ continue;
+ data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
+ if (!data){
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size of direntry not found");
+ op_errno = EIO;
+ op_ret = -1;
+ break;
+ }
+ entry->d_stat.ia_size = data_to_uint64(data);
+ }
+ unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+/*
+ * ->readdirp() fills in-core inodes, so we need to set proper
+ * file sizes for all directory entries of the parent @fd.
+ * Actual updates take place in ->crypt_readdirp_cbk()
+ */
+static int32_t crypt_readdirp(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto error;
+ }
+ else
+ dict_ref(xdata);
+ /*
+ * make sure that we'll have real file sizes at ->readdirp_cbk()
+ */
+ ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(xdata);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd,
+ size,
+ offset,
+ xdata);
+ dict_unref(xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ret, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_access(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ gf_log(this->name, GF_LOG_WARNING,
+ "NFS mounts of encrypted volumes are unsupported");
+ STACK_UNWIND_STRICT(access, frame, -1, EPERM, NULL);
+ return 0;
+}
+
+int32_t master_set_block_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ uint64_t block_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("block-size", block_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("block-size", block_size, size, error);
+
+ switch (block_size) {
+ case 512:
+ master->m_block_bits = 9;
+ break;
+ case 1024:
+ master->m_block_bits = 10;
+ break;
+ case 2048:
+ master->m_block_bits = 11;
+ break;
+ case 4096:
+ master->m_block_bits = 12;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: unsupported block size %llu",
+ (unsigned long long)block_size);
+ goto error;
+ }
+ return 0;
+ error:
+ return -1;
+}
+
+int32_t master_set_alg(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_alg = AES_CIPHER_ALG;
+ return 0;
+}
+
+int32_t master_set_mode(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_mode = XTS_CIPHER_MODE;
+ return 0;
+}
+
+/*
+ * set key size in bits to the master info
+ * Pre-conditions: cipher mode in the master info is uptodate.
+ */
+static int master_set_data_key_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ int32_t ret;
+ uint64_t key_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("data-key-size", key_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("data-key-size", key_size, size, error);
+
+ ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: wrong bin key size %llu for alg %d mode %d",
+ (unsigned long long)key_size,
+ (int)master->m_alg,
+ (int)master->m_mode);
+ goto error;
+ }
+ master->m_dkey_size = key_size;
+ return 0;
+ error:
+ return -1;
+}
+
+static int is_hex(char *s) {
+ return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
+}
+
+static int parse_hex_buf(xlator_t *this, char *src, unsigned char *dst,
+ int hex_size)
+{
+ int i;
+ int hex_byte = 0;
+
+ for (i = 0; i < (hex_size / 2); i++) {
+ if (!is_hex(src + i*2) || !is_hex(src + i*2 + 1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: not hex symbol in key");
+ return -1;
+ }
+ if (sscanf(src + i*2, "%2x", &hex_byte) != 1) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: can not parse hex key");
+ return -1;
+ }
+ dst[i] = hex_byte & 0xff;
+ }
+ return 0;
+}
+
+/*
+ * Parse options;
+ * install master volume key
+ */
+int32_t master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ int32_t ret;
+ FILE *file = NULL;
+
+ int32_t key_size;
+ char *opt_key_file_pathname = NULL;
+
+ unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
+ char hex_buf[2 * MASTER_VOL_KEY_SIZE];
+
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ /*
+ * extract master key passed via option
+ */
+ GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
+
+ if (!opt_key_file_pathname) {
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
+ return -1;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
+ opt_key_file_pathname);
+
+ file = fopen(opt_key_file_pathname, "r");
+ if (file == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: can not open file with master key");
+ return -1;
+ }
+ /*
+ * extract hex key
+ */
+ key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
+ if (key_size < sizeof(hex_buf)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: master key is too short");
+ goto bad_key;
+ }
+ ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
+ if (ret)
+ goto bad_key;
+ memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
+ memset(hex_buf, 0, sizeof(hex_buf));
+ fclose(file);
+
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return 0;
+ bad_key:
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
+ if (file)
+ fclose(file);
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return -1;
+}
+
+/*
+ * Derive volume key for object-id authentication
+ */
+int32_t master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ return get_nmtd_vol_key(get_master_cinfo(priv));
+}
+
+int32_t crypt_init_xlator(xlator_t *this)
+{
+ int32_t ret;
+ crypt_private_t *priv = this->private;
+
+ ret = master_set_alg(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_mode(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_block_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_data_key_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_master_vol_key(this, priv);
+ if (ret)
+ return ret;
+ return master_set_nmtd_vol_key(this, priv);
+}
+
+static int32_t crypt_alloc_private(xlator_t *this)
+{
+ this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
+ if (!this->private) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Can not allocate memory for private data");
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static void crypt_free_private(xlator_t *this)
+{
+ crypt_private_t *priv = this->private;
+ if (priv) {
+ memset(priv, 0, sizeof(*priv));
+ GF_FREE(priv);
+ }
+}
+
+int32_t reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ crypt_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("crypt", this, error);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, error);
+ GF_VALIDATE_OR_GOTO (this->name, options, error);
+
+ priv = this->private;
+
+ ret = master_set_block_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure block size");
+ goto error;
+ }
+ ret = master_set_data_key_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure data key size");
+ goto error;
+ }
+ return 0;
+ error:
+ return ret;
+}
+
+int32_t init(xlator_t *this)
+{
+ int32_t ret;
+
+ if (!this->children || this->children->next) {
+ gf_log ("crypt", GF_LOG_ERROR,
+ "FATAL: crypt should have exactly one child");
+ return EINVAL;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+ ret = crypt_alloc_private(this);
+ if (ret)
+ return ret;
+ ret = crypt_init_xlator(this);
+ if (ret)
+ goto error;
+ this->local_pool = mem_pool_new(crypt_local_t, 64);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto error;
+ }
+ gf_log ("crypt", GF_LOG_INFO, "crypt xlator loaded");
+ return 0;
+ error:
+ crypt_free_private(this);
+ return ret;
+}
+
+void fini (xlator_t *this)
+{
+ crypt_free_private(this);
+}
+
+struct xlator_fops fops = {
+ .readv = crypt_readv,
+ .writev = crypt_writev,
+ .truncate = crypt_truncate,
+ .ftruncate = crypt_ftruncate,
+ .setxattr = crypt_setxattr,
+ .fsetxattr = crypt_fsetxattr,
+ .link = crypt_link,
+ .unlink = crypt_unlink,
+ .rename = crypt_rename,
+ .open = crypt_open,
+ .create = crypt_create,
+ .stat = crypt_stat,
+ .fstat = crypt_fstat,
+ .lookup = crypt_lookup,
+ .readdirp = crypt_readdirp,
+ .access = crypt_access
+};
+
+struct xlator_cbks cbks = {
+ .forget = crypt_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"master-key"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "Pathname of regular file which contains master volume key"
+ },
+ { .key = {"data-key-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Data key size (bits)",
+ .min = 256,
+ .max = 512,
+ .default_value = "256",
+ },
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Atom size (bits)",
+ .min = 512,
+ .max = 4096,
+ .default_value = "4096"
+ },
+ { .key = {NULL} },
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
new file mode 100644
index 000000000..01a8542ab
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.h
@@ -0,0 +1,899 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_H__
+#define __CRYPT_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <openssl/hmac.h>
+#include <openssl/cmac.h>
+#include <openssl/modes.h>
+#include "crypt-mem-types.h"
+
+#define CRYPT_XLATOR_ID (0)
+
+#define MAX_IOVEC_BITS (3)
+#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
+#define KEY_FACTOR_BITS (6)
+
+#define DEBUG_CRYPT (0)
+#define TRIVIAL_TFM (0)
+
+#define CRYPT_MIN_BLOCK_BITS (9)
+#define CRYPT_MAX_BLOCK_BITS (12)
+
+#define MASTER_VOL_KEY_SIZE (32)
+#define NMTD_VOL_KEY_SIZE (16)
+
+struct crypt_key {
+ uint32_t len;
+ const char *label;
+};
+
+/*
+ * Add new key types to the end of this
+ * enumeration but before LAST_KEY_TYPE
+ */
+typedef enum {
+ MASTER_VOL_KEY,
+ NMTD_VOL_KEY,
+ NMTD_LINK_KEY,
+ EMTD_FILE_KEY,
+ DATA_FILE_KEY_256,
+ DATA_FILE_KEY_512,
+ LAST_KEY_TYPE
+}crypt_key_type;
+
+struct kderive_context {
+ const unsigned char *pkey;/* parent key */
+ uint32_t pkey_len; /* parent key size, bits */
+ uint32_t ckey_len; /* child key size, bits */
+ unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
+ uint32_t fid_len; /* fid len, bytes */
+ unsigned char *out; /* contains child keying material */
+ uint32_t out_len; /* out len, bytes */
+};
+
+typedef enum {
+ DATA_ATOM,
+ HOLE_ATOM,
+ LAST_DATA_TYPE
+}atom_data_type;
+
+typedef enum {
+ HEAD_ATOM,
+ TAIL_ATOM,
+ FULL_ATOM,
+ LAST_LOCALITY_TYPE
+}atom_locality_type;
+
+typedef enum {
+ MTD_CREATE,
+ MTD_APPEND,
+ MTD_OVERWRITE,
+ MTD_CUT,
+ MTD_LAST_OP
+} mtd_op_t;
+
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+};
+
+struct object_cipher_info {
+ cipher_alg_t o_alg;
+ cipher_mode_t o_mode;
+ uint32_t o_block_bits;
+ uint32_t o_dkey_size; /* raw data key size in bits */
+ union {
+ struct {
+ unsigned char ivec[16];
+ AES_KEY dkey[2];
+ AES_KEY tkey; /* key used for tweaking */
+ XTS128_CONTEXT xts;
+ } aes_xts;
+ } u;
+};
+
+struct master_cipher_info {
+ /*
+ * attributes inherited by newly created regular files
+ */
+ cipher_alg_t m_alg;
+ cipher_mode_t m_mode;
+ uint32_t m_block_bits;
+ uint32_t m_dkey_size; /* raw key size in bits */
+ /*
+ * master key
+ */
+ unsigned char m_key[MASTER_VOL_KEY_SIZE];
+ /*
+ * volume key for oid authentication
+ */
+ unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
+};
+
+/*
+* This info is not changed during file's life
+ */
+struct crypt_inode_info {
+#if DEBUG_CRYPT
+ loc_t *loc; /* pathname that the file has been
+ opened, or created with */
+#endif
+ uint16_t nr_minor;
+ uuid_t oid;
+ struct object_cipher_info cinfo;
+};
+
+/*
+ * this should locate in secure memory
+ */
+typedef struct {
+ struct master_cipher_info master;
+} crypt_private_t;
+
+static inline struct master_cipher_info *get_master_cinfo(crypt_private_t *priv)
+{
+ return &priv->master;
+}
+
+static inline struct object_cipher_info *get_object_cinfo(struct crypt_inode_info
+ *info)
+{
+ return &info->cinfo;
+}
+
+/*
+ * this describes layouts and properties
+ * of atoms in an aligned vector
+ */
+struct avec_config {
+ uint32_t atom_size;
+ atom_data_type type;
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head;
+ uint32_t off_in_tail;
+ uint32_t gap_in_tail;
+ uint32_t nr_full_blocks;
+
+ struct iovec *avec; /* aligned vector */
+ uint32_t acount; /* number of avec components. The same
+ * as number of occupied logical blocks */
+ char **pool;
+ uint32_t blocks_in_pool;
+ uint32_t cursor; /* makes sense only for ordered writes,
+ * so there is no races on this counter.
+ *
+ * Cursor is per-config object, we don't
+ * reset cursor for atoms of different
+ * localities (head, tail, full)
+ */
+};
+
+
+typedef struct {
+ glusterfs_fop_t fop; /* code of FOP this local info built for */
+ fd_t *fd;
+ inode_t *inode;
+ loc_t *loc;
+ int32_t mac_idx;
+ loc_t *newloc;
+ int32_t flags;
+ int32_t wbflags;
+ struct crypt_inode_info *info;
+ struct iobref *iobref;
+ struct iobref *iobref_data;
+ off_t offset;
+
+ uint64_t old_file_size; /* per FOP, retrieved under lock held */
+ uint64_t cur_file_size; /* per iteration, before issuing IOs */
+ uint64_t new_file_size; /* per iteration, after issuing IOs */
+
+ uint64_t io_offset; /* offset of IOs issued per iteration */
+ uint64_t io_offset_nopad; /* offset of user's data in the atom */
+ uint32_t io_size; /* size of IOs issued per iteration */
+ uint32_t io_size_nopad; /* size of user's data in the IOs */
+ uint32_t eof_padding_size; /* size od EOF padding in the IOs */
+
+ gf_lock_t call_lock; /* protect nr_calls from many cbks */
+ int32_t nr_calls;
+
+ atom_data_type active_setup; /* which setup (hole or date)
+ is currently active */
+ /* data setup */
+ struct avec_config data_conf;
+
+ /* hole setup */
+ int hole_conv_in_proggress;
+ gf_lock_t hole_lock; /* protect hole config from many cbks */
+ int hole_handled;
+ struct avec_config hole_conf;
+ struct iatt buf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t rw_count; /* total read or written */
+ gf_lock_t rw_count_lock; /* protect the counter above */
+ unsigned char *format; /* for create, update format string */
+ uint32_t format_size;
+ uint32_t msgflags; /* messages for crypt_open() */
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iovec vec; /* contains last file's atom for
+ read-prune-write sequence */
+ gf_boolean_t custom_mtd;
+ /*
+ * the next 3 fields are used by readdir and friends
+ */
+ gf_dirent_t *de; /* directory entry */
+ char *de_path; /* pathname of directory entry */
+ uint32_t de_prefix_len; /* lenght of the parent's pathname */
+ gf_dirent_t *entries;
+
+ uint32_t update_disk_file_size:1;
+} crypt_local_t;
+
+/* This represents a (read)modify-write atom */
+struct rmw_atom {
+ atom_locality_type locality;
+ /*
+ * read-modify-write sequence of the atom
+ */
+ int32_t (*rmw)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata);
+ /*
+ * offset of the logical block in a file
+ */
+ loff_t (*offset_at)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * IO offset in an atom
+ */
+ uint32_t (*offset_in)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * number of bytes of plain text of this atom that user
+ * wants to read/write.
+ * It can be smaller than atom_size in the case of head
+ * or tail atoms.
+ */
+ uint32_t (*io_size_nopad)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * which iovec represents the atom
+ */
+ struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
+ /*
+ * how many bytes of partial block should be uptodated by
+ * reading from disk.
+ * This is used to perform a read component of RMW (read-modify-write).
+ */
+ uint32_t (*count_to_uptodate)(call_frame_t *frame, struct object_cipher_info *object);
+ struct avec_config *(*get_config)(call_frame_t *frame);
+};
+
+struct data_cipher_alg {
+ gf_boolean_t atomic; /* true means that algorithm requires
+ to pad data before cipher transform */
+ gf_boolean_t should_pad; /* true means that algorithm requires
+ to pad the end of file with extra-data */
+ uint32_t blkbits; /* blksize = 1 << blkbits */
+ /*
+ * any preliminary sanity checks goes here
+ */
+ int32_t (*init)(void);
+ /*
+ * set alg-mode specific inode info
+ */
+ int32_t (*set_private)(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * check alg-mode specific data key
+ */
+ int32_t (*check_key)(uint32_t key_size);
+ void (*set_iv)(off_t offset, struct object_cipher_info *object);
+ int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
+ size_t length, off_t offset, const int enc,
+ struct object_cipher_info *object);
+};
+
+/*
+ * version-dependent metadata loader
+ */
+struct crypt_mtd_loader {
+ /*
+ * return core format size
+ */
+ size_t (*format_size)(mtd_op_t op, size_t old_size);
+ /*
+ * pack version-specific metadata of an object
+ * at ->create()
+ */
+ int32_t (*create_format)(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * extract version-specific metadata of an object
+ * at ->open() time
+ */
+ int32_t (*open_format)(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info);
+ int32_t (*update_format)(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+};
+
+typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata);
+typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
+typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
+
+
+/* Declarations */
+
+/* keys.c */
+extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
+int32_t get_nmtd_vol_key(struct master_cipher_info *master);
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key);
+/* data.c */
+extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE];
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf);
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count);
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop);
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype);
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t setup_gap_in_tail);
+
+/* metadata.c */
+extern struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER];
+
+int32_t alloc_format(crypt_local_t *local, size_t size);
+int32_t alloc_format_create(crypt_local_t *local);
+void free_format(crypt_local_t *local);
+size_t format_size(mtd_op_t op, size_t old_size);
+size_t new_format_size(void);
+int32_t open_format(unsigned char *str, int32_t len, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master, crypt_local_t *local,
+ gf_boolean_t load_info);
+int32_t update_format(unsigned char *new, unsigned char *old,
+ size_t old_len, int32_t mac_idx, mtd_op_t op, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+
+/* atom.c */
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality);
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype);
+void submit_full(call_frame_t *frame, xlator_t *this);
+
+/* crypt.c */
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop);
+static size_t iovec_get_size(struct iovec *vec, uint32_t count);
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom, off_t io_offset,
+ uint32_t io_size);
+void link_wind(call_frame_t *frame, xlator_t *this);
+void unlink_wind(call_frame_t *frame, xlator_t *this);
+void link_unwind(call_frame_t *frame);
+void unlink_unwind(call_frame_t *frame);
+void rename_wind(call_frame_t *frame, xlator_t *this);
+void rename_unwind(call_frame_t *frame);
+
+/* Inline functions */
+
+static inline size_t iovec_get_size(struct iovec *vec, uint32_t count)
+{
+ int i;
+ size_t size = 0;
+ for (i = 0; i < count; i++)
+ size += vec[i].iov_len;
+ return size;
+}
+
+static inline int32_t crypt_xlator_id(void)
+{
+ return CRYPT_XLATOR_ID;
+}
+
+static inline mtd_loader_id current_mtd_loader(void)
+{
+ return MTD_LOADER_V1;
+}
+
+static inline uint32_t master_key_size (void)
+{
+ return crypt_keys[MASTER_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t nmtd_vol_key_size (void)
+{
+ return crypt_keys[NMTD_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t alg_mode_blkbits(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].blkbits;
+}
+
+static inline uint32_t alg_mode_blksize(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return 1 << alg_mode_blkbits(alg, mode);
+}
+
+static inline gf_boolean_t alg_mode_atomic(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].atomic;
+}
+
+static inline gf_boolean_t alg_mode_should_pad(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].should_pad;
+}
+
+static inline uint32_t master_alg_blksize(struct master_cipher_info *mr)
+{
+ return alg_mode_blksize(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t master_alg_blkbits(struct master_cipher_info *mr)
+{
+ return alg_mode_blkbits(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_atomic(struct master_cipher_info *mr)
+{
+ return alg_mode_atomic(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_should_pad(struct master_cipher_info *mr)
+{
+ return alg_mode_should_pad(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t object_alg_blksize(struct object_cipher_info *ob)
+{
+ return alg_mode_blksize(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t object_alg_blkbits(struct object_cipher_info *ob)
+{
+ return alg_mode_blkbits(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_atomic(struct object_cipher_info *ob)
+{
+ return alg_mode_atomic(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_should_pad(struct object_cipher_info *ob)
+{
+ return alg_mode_should_pad(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t aes_raw_key_size(struct master_cipher_info *master)
+{
+ return master->m_dkey_size >> 3;
+}
+
+static inline struct avec_config *get_hole_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->hole_conf);
+}
+
+static inline struct avec_config *get_data_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->data_conf);
+}
+
+static inline int32_t get_atom_bits (struct object_cipher_info *object)
+{
+ return object->o_block_bits;
+}
+
+static inline int32_t get_atom_size (struct object_cipher_info *object)
+{
+ return 1 << get_atom_bits(object);
+}
+
+static inline int32_t has_head_block(struct avec_config *conf)
+{
+ return conf->off_in_head ||
+ (conf->acount == 1 && conf->off_in_tail);
+}
+
+static inline int32_t has_tail_block(struct avec_config *conf)
+{
+ return conf->off_in_tail && conf->acount > 1;
+}
+
+static inline int32_t has_full_blocks(struct avec_config *conf)
+{
+ return conf->nr_full_blocks;
+}
+
+static inline int32_t should_submit_head_block(struct avec_config *conf)
+{
+ return has_head_block(conf) && (conf->cursor == 0);
+}
+
+static inline int32_t should_submit_tail_block(struct avec_config *conf)
+{
+ return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
+}
+
+static inline int32_t should_submit_full_block(struct avec_config *conf)
+{
+ uint32_t start = has_head_block(conf) ? 1 : 0;
+
+ return has_full_blocks(conf) &&
+ conf->cursor >= start &&
+ conf->cursor < start + conf->nr_full_blocks;
+}
+
+#if DEBUG_CRYPT
+static inline void crypt_check_input_len(size_t len,
+ struct object_cipher_info *object)
+{
+ if (object_alg_should_pad(object) && (len & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
+}
+
+static inline void check_head_block(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
+}
+
+static inline void check_tail_block(struct avec_config *conf)
+{
+ if (!has_tail_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
+}
+
+static inline void check_full_block(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
+}
+
+static inline void check_cursor_head(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of head atom method");
+ else if (conf->cursor != 0)
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
+ conf->cursor);
+}
+
+static inline void check_cursor_full(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of full atom method");
+ if (has_head_block(conf) && (conf->cursor == 0))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor is not at full atom");
+}
+
+/*
+ * FIXME: use avec->iov_len to check setup
+ */
+static inline int data_local_invariant(crypt_local_t *local)
+{
+ return 0;
+}
+
+#else
+#define crypt_check_input_len(len, object) noop
+#define check_head_block(conf) noop
+#define check_tail_block(conf) noop
+#define check_full_block(conf) noop
+#define check_cursor_head(conf) noop
+#define check_cursor_full(conf) noop
+
+#endif /* DEBUG_CRYPT */
+
+static inline struct avec_config *conf_by_type(call_frame_t *frame,
+ atom_data_type dtype)
+{
+ struct avec_config *conf = NULL;
+
+ switch (dtype) {
+ case HOLE_ATOM:
+ conf = get_hole_conf(frame);
+ break;
+ case DATA_ATOM:
+ conf = get_data_conf(frame);
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
+ }
+ return conf;
+}
+
+static inline uint32_t nr_calls_head(struct avec_config *conf)
+{
+ return has_head_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_tail(struct avec_config *conf)
+{
+ return has_tail_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_full(struct avec_config *conf)
+{
+ switch(conf->type) {
+ case HOLE_ATOM:
+ return has_full_blocks(conf);
+ case DATA_ATOM:
+ return has_full_blocks(conf) ?
+ logical_blocks_occupied(0,
+ conf->nr_full_blocks,
+ MAX_IOVEC_BITS) : 0;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
+ return 0;
+ }
+}
+
+static inline uint32_t nr_calls(struct avec_config *conf)
+{
+ return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
+}
+
+static inline uint32_t nr_calls_data(call_frame_t *frame)
+{
+ return nr_calls(get_data_conf(frame));
+}
+
+static inline uint32_t nr_calls_hole(call_frame_t *frame)
+{
+ return nr_calls(get_hole_conf(frame));
+}
+
+static inline void get_one_call_nolock(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ ++local->nr_calls;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
+}
+
+static inline void get_one_call(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_one_call_nolock(frame);
+ UNLOCK(&local->call_lock);
+}
+
+static inline void get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ local->nr_calls += nr;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
+}
+
+static inline void get_nr_calls(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_nr_calls_nolock(frame, nr);
+ UNLOCK(&local->call_lock);
+}
+
+static inline int put_one_call(crypt_local_t *local)
+{
+ uint32_t last = 0;
+
+ LOCK(&local->call_lock);
+ if (--local->nr_calls == 0)
+ last = 1;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
+
+ UNLOCK(&local->call_lock);
+ return last;
+}
+
+static inline int is_appended_write(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->orig_offset + conf->orig_size > local->old_file_size;
+}
+
+static inline int is_ordered_mode(call_frame_t *frame)
+{
+#if 0
+ crypt_local_t *local = frame->local;
+ return local->fop == GF_FOP_FTRUNCATE ||
+ (local->fop == GF_FOP_WRITE && is_appended_write(frame));
+#endif
+ return 1;
+}
+
+static inline int32_t hole_conv_completed(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+ return conf->cursor == conf->acount;
+}
+
+static inline int32_t data_write_in_progress(crypt_local_t *local)
+{
+ return local->active_setup == DATA_ATOM;
+}
+
+static inline int32_t parent_is_crypt_xlator(call_frame_t *frame,
+ xlator_t *this)
+{
+ return frame->parent->this == this;
+}
+
+static inline linkop_wind_handler_t linkop_wind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_wind;
+ case GF_FOP_UNLINK:
+ return unlink_wind;
+ case GF_FOP_RENAME:
+ return rename_wind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline linkop_unwind_handler_t linkop_unwind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_unwind;
+ case GF_FOP_UNLINK:
+ return unlink_unwind;
+ case GF_FOP_RENAME:
+ return rename_unwind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline mtd_op_t linkop_mtdop_dispatch(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_LINK:
+ return MTD_APPEND;
+ case GF_FOP_UNLINK:
+ return MTD_CUT;
+ case GF_FOP_RENAME:
+ return MTD_OVERWRITE;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
+ return MTD_LAST_OP;
+ }
+}
+
+#endif /* __CRYPT_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
new file mode 100644
index 000000000..762fa554a
--- /dev/null
+++ b/xlators/encryption/crypt/src/data.c
@@ -0,0 +1,769 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
+{
+ unsigned char *ivec;
+
+ ivec = object->u.aes_xts.ivec;
+
+ /* convert the tweak into a little-endian byte
+ * array (IEEE P1619/D16, May 2007, section 5.1)
+ */
+
+ *((uint64_t *)ivec) = htole64(offset);
+
+ /* ivec is padded with zeroes */
+}
+
+static int32_t aes_set_keys_common(unsigned char *raw_key, uint32_t key_size,
+ AES_KEY *keys)
+{
+ int32_t ret;
+
+ ret = AES_set_encrypt_key(raw_key,
+ key_size,
+ &keys[AES_ENCRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
+ return ret;
+ }
+ ret = AES_set_decrypt_key(raw_key,
+ key_size,
+ &keys[AES_DECRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * set private cipher info for xts mode
+ */
+static int32_t set_private_aes_xts(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int ret;
+ struct object_cipher_info *object = get_object_cinfo(info);
+ unsigned char *data_key;
+ uint32_t subkey_size;
+
+ /* init tweak value */
+ memset(object->u.aes_xts.ivec, 0, 16);
+
+ data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
+ if (!data_key)
+ return ENOMEM;
+
+ /*
+ * retrieve data keying meterial
+ */
+ ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * parse compound xts key
+ */
+ subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
+ /*
+ * install key for data encryption
+ */
+ ret = aes_set_keys_common(data_key,
+ subkey_size << 3, object->u.aes_xts.dkey);
+ if (ret) {
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * set up key used to encrypt tweaks
+ */
+ ret = AES_set_encrypt_key(data_key + subkey_size,
+ object->o_dkey_size / 2,
+ &object->u.aes_xts.tkey);
+ if (ret < 0)
+ gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
+
+ GF_FREE(data_key);
+ return ret;
+}
+
+static int32_t aes_xts_init(void)
+{
+ cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
+ return 0;
+}
+
+static int32_t check_key_aes_xts(uint32_t keysize)
+{
+ switch(keysize) {
+ case 256:
+ case 512:
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int32_t encrypt_aes_xts(const unsigned char *from,
+ unsigned char *to, size_t length,
+ off_t offset, const int enc,
+ struct object_cipher_info *object)
+{
+ XTS128_CONTEXT ctx;
+ if (enc) {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
+ ctx.block1 = (block128_f)AES_encrypt;
+ }
+ else {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
+ ctx.block1 = (block128_f)AES_decrypt;
+ }
+ ctx.key2 = &object->u.aes_xts.tkey;
+ ctx.block2 = (block128_f)AES_encrypt;
+
+ return CRYPTO_xts128_encrypt(&ctx,
+ object->u.aes_xts.ivec,
+ from,
+ to,
+ length, enc);
+}
+
+/*
+ * Cipher input chunk @from of length @len;
+ * @to: result of cipher transform;
+ * @off: offset in a file (must be cblock-aligned);
+ */
+static void cipher_data(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ const int enc)
+{
+ crypt_check_input_len(len, object);
+
+#if TRIVIAL_TFM && DEBUG_CRYPT
+ return;
+#endif
+ data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
+ data_cipher_algs[object->o_alg][object->o_mode].encrypt
+ ((const unsigned char *)from,
+ (unsigned char *)to,
+ len,
+ off,
+ enc,
+ object);
+}
+
+#define MAX_CIPHER_CHUNK (1 << 30)
+
+/*
+ * Do cipher (encryption/decryption) transform of a
+ * continuous region of memory.
+ *
+ * @len: a number of bytes to transform;
+ * @buf: data to transform;
+ * @off: offset in a file, should be block-aligned
+ * for atomic cipher modes and ksize-aligned
+ * for other modes).
+ * @dir: direction of transform (encrypt/decrypt).
+ */
+static void cipher_region(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ int dir)
+{
+ while (len > 0) {
+ size_t to_cipher;
+
+ to_cipher = len;
+ if (to_cipher > MAX_CIPHER_CHUNK)
+ to_cipher = MAX_CIPHER_CHUNK;
+
+ /* this will reset IV */
+ cipher_data(object,
+ from,
+ to,
+ off,
+ to_cipher,
+ dir);
+ from += to_cipher;
+ to += to_cipher;
+ off += to_cipher;
+ len -= to_cipher;
+ }
+}
+
+/*
+ * Do cipher transform (encryption/decryption) of
+ * plaintext/ciphertext represented by @vec.
+ *
+ * Pre-conditions: @vec represents a continuous piece
+ * of data in a file at offset @off to be ciphered
+ * (encrypted/decrypted).
+ * @count is the number of vec's components. All the
+ * components must be block-aligned, the caller is
+ * responsible for this. @dir is "direction" of
+ * transform (encrypt/decrypt).
+ */
+static void cipher_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off,
+ int32_t dir)
+{
+ int i;
+ int len = 0;
+
+ for (i = 0; i < count; i++) {
+ cipher_region(object,
+ vec[i].iov_base,
+ vec[i].iov_base,
+ off + len,
+ vec[i].iov_len,
+ dir);
+ len += vec[i].iov_len;
+ }
+}
+
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 1);
+}
+
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 0);
+}
+
+#if DEBUG_CRYPT
+static void compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
+{
+ int i;
+ int off = 0;
+ for (i = 0; i < count; i++) {
+ memcpy(buf + off,
+ vec[i].iov_base + skip,
+ vec[i].iov_len - skip);
+
+ off += (vec[i].iov_len - skip);
+ skip = 0;
+ }
+}
+
+static void check_iovecs(struct iovec *vec, int cnt,
+ struct iovec *avec, int acnt, uint32_t off_in_head)
+{
+ char *s1, *s2;
+ uint32_t size, asize;
+
+ size = iovec_get_size(vec, cnt);
+ asize = iovec_get_size(avec, acnt) - off_in_head;
+ if (size != asize) {
+ gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d",
+ size, asize);
+ return;
+ }
+ s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
+ if (!s1) {
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
+ if (!s2) {
+ GF_FREE(s1);
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ compound_stream(vec, cnt, s1, 0);
+ compound_stream(avec, acnt, s2, off_in_head);
+ if (memcmp(s1, s2, size))
+ gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
+ GF_FREE(s1);
+ GF_FREE(s2);
+}
+
+#else
+#define check_iovecs(vec, count, avec, avecn, off) noop
+#endif /* DEBUG_CRYPT */
+
+static char *data_alloc_block(xlator_t *this, crypt_local_t *local,
+ int32_t block_size)
+{
+ struct iobuf *iobuf = NULL;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
+ if (!iobuf) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobuf");
+ return NULL;
+ }
+ if (!local->iobref_data) {
+ local->iobref_data = iobref_new();
+ if (!local->iobref_data) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobref");
+ iobuf_unref(iobuf);
+ return NULL;
+ }
+ }
+ iobref_add(local->iobref_data, iobuf);
+ return iobuf->ptr;
+}
+
+/*
+ * Compound @avec, which represent the same data
+ * chunk as @vec, but has aligned components of
+ * specified block size. Alloc blocks, if needed.
+ * In particular, incomplete head and tail blocks
+ * must be allocated.
+ * Put number of allocated blocks to @num_blocks.
+ *
+ * Example:
+ *
+ * input: data chunk represented by 4 components
+ * [AB],[BC],[CD],[DE];
+ * output: 5 logical blocks (0, 1, 2, 3, 4).
+ *
+ * A B C D E
+ * *-----*+------*-+---*----+--------+-*
+ * | || | | | | | |
+ * *-+-----+*------+-*---+----*--------*-+------*
+ * 0 1 2 3 4
+ *
+ * 0 - incomplete compound (head);
+ * 1, 2 - full compound;
+ * 3 - full non-compound (the case of reuse);
+ * 4 - incomplete non-compound (tail).
+ */
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf)
+{
+ int vecn = 0; /* number of the current component in vec */
+ int avecn = 0; /* number of the current component in avec */
+ off_t vec_off = 0; /* offset in the current vec component,
+ * i.e. the number of bytes have already
+ * been copied */
+ int32_t block_size = get_atom_size(object);
+ size_t to_process; /* number of vec's bytes to copy and(or) re-use */
+ int32_t off_in_head = conf->off_in_head;
+
+ to_process = iovec_get_size(vec, count);
+
+ while (to_process > 0) {
+ if (off_in_head ||
+ vec[vecn].iov_len - vec_off < block_size) {
+ /*
+ * less than block_size:
+ * the case of incomplete (head or tail),
+ * or compound block
+ */
+ size_t copied = 0;
+ /*
+ * populate the pool with a new block
+ */
+ blocks[*blocks_allocated] = data_alloc_block(this,
+ local,
+ block_size);
+ if (!blocks[*blocks_allocated])
+ return -ENOMEM;
+ memset(blocks[*blocks_allocated], 0, off_in_head);
+ /*
+ * fill the block with vec components
+ */
+ do {
+ size_t to_copy;
+
+ to_copy = vec[vecn].iov_len - vec_off;
+ if (to_copy > block_size - off_in_head)
+ to_copy = block_size - off_in_head;
+
+ memcpy(blocks[*blocks_allocated] + off_in_head + copied,
+ vec[vecn].iov_base + vec_off,
+ to_copy);
+
+ copied += to_copy;
+ to_process -= to_copy;
+
+ vec_off += to_copy;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ } while (copied < (block_size - off_in_head) && to_process > 0);
+ /*
+ * update avec
+ */
+ avec[avecn].iov_len = off_in_head + copied;
+ avec[avecn].iov_base = blocks[*blocks_allocated];
+
+ (*blocks_allocated)++;
+ off_in_head = 0;
+ } else {
+ /*
+ * the rest of the current vec component
+ * is not less than block_size, so reuse
+ * the memory buffer of the component.
+ */
+ size_t to_reuse;
+ to_reuse = (to_process > block_size ?
+ block_size :
+ to_process);
+ avec[avecn].iov_len = to_reuse;
+ avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
+
+ vec_off += to_reuse;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ to_process -= to_reuse;
+ }
+ avecn++;
+ }
+ check_iovecs(vec, count, avec, avecn, conf->off_in_head);
+ return 0;
+}
+
+/*
+ * allocate and setup aligned vector for data submission
+ * Pre-condition: @conf is set.
+ */
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count)
+{
+ int32_t ret = ENOMEM;
+ struct iovec *avec;
+ char **pool;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = DATA_ATOM;
+
+ avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ret;
+ pool = GF_CALLOC(conf->acount, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ret;
+ }
+ if (!vec) {
+ /*
+ * degenerated case: no data
+ */
+ pool[0] = data_alloc_block(this, local, get_atom_size(object));
+ if (!pool[0])
+ goto free;
+ blocks_in_pool = 1;
+ avec->iov_base = pool[0];
+ avec->iov_len = conf->off_in_tail;
+ }
+ else {
+ ret = align_iov_by_atoms(this, local, object, vec, vec_count,
+ avec, pool, &blocks_in_pool, conf);
+ if (ret)
+ goto free;
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ret;
+}
+
+/*
+ * allocate and setup aligned vector for hole submission
+ */
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop)
+{
+ uint32_t i, idx;
+ struct iovec *avec;
+ char **pool;
+ uint32_t num_blocks;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = HOLE_ATOM;
+
+ num_blocks = conf->acount -
+ (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ /*
+ * hole goes before data
+ */
+ if (num_blocks == 1 && conf->off_in_tail != 0)
+ /*
+ * we won't submit a hole which fits into
+ * a data atom: this part of hole will be
+ * submitted with data write
+ */
+ return 0;
+ break;
+ case GF_FOP_FTRUNCATE:
+ /*
+ * expanding truncate, hole goes after data,
+ * and will be submited in any case.
+ */
+ break;
+ default:
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad file operation %d", fop);
+ return 0;
+ }
+ avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ENOMEM;
+ pool = GF_CALLOC(num_blocks, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ENOMEM;
+ }
+ for (i = 0; i < num_blocks; i++) {
+ pool[i] = data_alloc_block(this, local, get_atom_size(object));
+ if (pool[i] == NULL)
+ goto free;
+ blocks_in_pool++;
+ }
+ if (has_head_block(conf)) {
+ /* set head block */
+ idx = 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base + conf->off_in_head,
+ 0,
+ get_atom_size(object) - conf->off_in_head);
+ }
+ if (has_tail_block(conf)) {
+ /* set tail block */
+ idx = num_blocks - 1;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base, 0, conf->off_in_tail);
+ }
+ if (has_full_blocks(conf)) {
+ /* set full block */
+ idx = conf->off_in_head ? 1 : 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ /*
+ * since we re-use the buffer,
+ * zeroes will be set every time
+ * before encryption, see submit_full()
+ */
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ENOMEM;
+}
+
+/* A helper for setting up config of partial atoms (which
+ * participate in read-modify-write sequence).
+ *
+ * Calculate and setup precise amount of "extra-bytes"
+ * that should be uptodated at the end of partial (not
+ * necessarily tail!) block.
+ *
+ * Pre-condition: local->old_file_size is valid!
+ * @conf contains setup, which is enough for correct calculation
+ * of has_tail_block(), ->get_offset().
+ */
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype)
+{
+ uint32_t to_block;
+ crypt_local_t *local = frame->local;
+ uint64_t old_file_size = local->old_file_size;
+ struct rmw_atom *partial = atom_by_types(dtype,
+ has_tail_block(conf) ?
+ TAIL_ATOM : HEAD_ATOM);
+
+ if (old_file_size <= partial->offset_at(frame, object))
+ to_block = 0;
+ else {
+ to_block = old_file_size - partial->offset_at(frame, object);
+ if (to_block > get_atom_size(object))
+ to_block = get_atom_size(object);
+ }
+ if (to_block > conf->off_in_tail)
+ conf->gap_in_tail = to_block - conf->off_in_tail;
+ else
+ /*
+ * nothing to uptodate
+ */
+ conf->gap_in_tail = 0;
+}
+
+/*
+ * fill struct avec_config with offsets layouts
+ */
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t set_gap)
+{
+ crypt_local_t *local;
+ struct object_cipher_info *object;
+ struct avec_config *conf;
+ uint32_t resid;
+
+ uint32_t atom_size;
+ uint32_t atom_bits;
+
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head = 0;
+ uint32_t off_in_tail = 0;
+ uint32_t nr_full_blocks;
+ int32_t size_full_blocks;
+
+ uint32_t acount; /* number of alifned components to write.
+ * The same as number of occupied logical
+ * blocks (atoms)
+ */
+ local = frame->local;
+ object = &local->info->cinfo;
+ conf = (dtype == DATA_ATOM ?
+ get_data_conf(frame) : get_hole_conf(frame));
+
+ orig_offset = offset;
+ orig_size = count;
+
+ atom_size = get_atom_size(object);
+ atom_bits = get_atom_bits(object);
+
+ /*
+ * Round-down the start,
+ * round-up the end.
+ */
+ resid = offset & (uint64_t)(atom_size - 1);
+
+ if (resid)
+ off_in_head = resid;
+ aligned_offset = offset - off_in_head;
+ expanded_size = orig_size + off_in_head;
+
+ /* calculate tail,
+ expand size forward */
+ resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
+
+ if (resid) {
+ off_in_tail = resid;
+ expanded_size += (atom_size - off_in_tail);
+ }
+ /*
+ * calculate number of occupied blocks
+ */
+ acount = expanded_size >> atom_bits;
+ /*
+ * calculate number of full blocks
+ */
+ size_full_blocks = expanded_size;
+ if (off_in_head)
+ size_full_blocks -= atom_size;
+ if (off_in_tail && size_full_blocks > 0)
+ size_full_blocks -= atom_size;
+ nr_full_blocks = size_full_blocks >> atom_bits;
+
+ conf->atom_size = atom_size;
+ conf->orig_size = orig_size;
+ conf->orig_offset = orig_offset;
+ conf->expanded_size = expanded_size;
+ conf->aligned_offset = aligned_offset;
+
+ conf->off_in_head = off_in_head;
+ conf->off_in_tail = off_in_tail;
+ conf->nr_full_blocks = nr_full_blocks;
+ conf->acount = acount;
+ /*
+ * Finally, calculate precise amount of
+ * "extra-bytes" that should be uptodated
+ * at the end.
+ * Only if RMW is expected.
+ */
+ if (off_in_tail && set_gap)
+ set_gap_at_end(frame, object, conf, dtype);
+}
+
+struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
+ [AES_CIPHER_ALG][XTS_CIPHER_MODE] =
+ { .atomic = _gf_true,
+ .should_pad = _gf_true,
+ .blkbits = AES_BLOCK_BITS,
+ .init = aes_xts_init,
+ .set_private = set_private_aes_xts,
+ .check_key = check_key_aes_xts,
+ .set_iv = set_iv_aes_xts,
+ .encrypt = encrypt_aes_xts
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
new file mode 100644
index 000000000..4a1d3bb5a
--- /dev/null
+++ b/xlators/encryption/crypt/src/keys.c
@@ -0,0 +1,302 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/* Key hierarchy
+
+ +----------------+
+ | MASTER_VOL_KEY |
+ +-------+--------+
+ |
+ |
+ +----------------+----------------+
+ | | |
+ | | |
+ +-------+------+ +-------+-------+ +------+--------+
+ | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
+ +-------+------+ +---------------+ +---------------+
+ |
+ |
+ +-------+-------+
+ | NMTD_LINK_KEY |
+ +---------------+
+
+ */
+
+#if DEBUG_CRYPT
+static void check_prf_iters(uint32_t num_iters)
+{
+ if (num_iters == 0)
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad number of prf iterations : %d", num_iters);
+}
+#else
+#define check_prf_iters(num_iters) noop
+#endif /* DEBUG_CRYPT */
+
+unsigned char crypt_fake_oid[16] =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+/*
+ * derive key in the counter mode using
+ * sha256-based HMAC as PRF, see
+ * NIST Special Publication 800-108, 5.1)
+ */
+
+#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
+
+static int32_t kderive_init(struct kderive_context *ctx,
+ const unsigned char *pkey, /* parent key */
+ uint32_t pkey_size, /* parent key size */
+ const unsigned char *idctx, /* id-context */
+ uint32_t idctx_size,
+ crypt_key_type type /* type of child key */)
+{
+ unsigned char *pos;
+ uint32_t llen = strlen(crypt_keys[type].label);
+ /*
+ * Compoud the fixed input data for KDF:
+ * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
+ * NIST SP 800-108, 5.1
+ */
+ ctx->fid_len =
+ sizeof(uint32_t) +
+ llen +
+ 1 +
+ idctx_size +
+ sizeof(uint32_t);
+
+ ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
+ if (!ctx->fid)
+ return ENOMEM;
+ ctx->out_len = round_up(crypt_keys[type].len >> 3,
+ PRF_OUTPUT_SIZE);
+ ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
+ if (!ctx->out) {
+ GF_FREE(ctx->fid);
+ return ENOMEM;
+ }
+ ctx->pkey = pkey;
+ ctx->pkey_len = pkey_size;
+ ctx->ckey_len = crypt_keys[type].len;
+
+ pos = ctx->fid;
+
+ /* counter will be set up in kderive_rfn() */
+ pos += sizeof(uint32_t);
+
+ memcpy(pos, crypt_keys[type].label, llen);
+ pos += llen;
+
+ /* set up zero octet */
+ *pos = 0;
+ pos += 1;
+
+ memcpy(pos, idctx, idctx_size);
+ pos += idctx_size;
+
+ *((uint32_t *)pos) = htobe32(ctx->ckey_len);
+
+ return 0;
+}
+
+static void kderive_update(struct kderive_context *ctx)
+{
+ uint32_t i;
+ HMAC_CTX hctx;
+ unsigned char *pos = ctx->out;
+ uint32_t *p_iter = (uint32_t *)ctx->fid;
+ uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
+
+ check_prf_iters(num_iters);
+
+ HMAC_CTX_init(&hctx);
+ for (i = 0; i < num_iters; i++) {
+ /*
+ * update the iteration number in the fid
+ */
+ *p_iter = htobe32(i);
+ HMAC_Init_ex(&hctx,
+ ctx->pkey, ctx->pkey_len >> 3,
+ EVP_sha256(),
+ NULL);
+ HMAC_Update(&hctx, ctx->fid, ctx->fid_len);
+ HMAC_Final(&hctx, pos, NULL);
+
+ pos += PRF_OUTPUT_SIZE;
+ }
+ HMAC_CTX_cleanup(&hctx);
+}
+
+static void kderive_final(struct kderive_context *ctx, unsigned char *child)
+{
+ memcpy(child, ctx->out, ctx->ckey_len >> 3);
+ GF_FREE(ctx->fid);
+ GF_FREE(ctx->out);
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/*
+ * derive per-volume key for object ids aithentication
+ */
+int32_t get_nmtd_vol_key(struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ crypt_fake_oid, sizeof(uuid_t), NMTD_VOL_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, master->m_nmtd_key);
+ return 0;
+}
+
+/*
+ * derive per-link key for aithentication of non-encrypted
+ * meta-data (nmtd)
+ */
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_nmtd_key,
+ nmtd_vol_key_size(),
+ (const unsigned char *)loc->path,
+ strlen(loc->path), NMTD_LINK_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+/*
+ * derive per-file key for encryption and authentication
+ * of encrypted part of metadata (emtd)
+ */
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), EMTD_FILE_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+static int32_t data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
+{
+ int32_t ret = 0;
+ switch (keysize) {
+ case 256:
+ *type = DATA_FILE_KEY_256;
+ break;
+ case 512:
+ *type = DATA_FILE_KEY_512;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
+ keysize);
+ ret = ENOTSUP;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * derive per-file key for data encryption
+ */
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+ crypt_key_type type;
+
+ ret = data_key_type_by_size(keysize, &type);
+ if (ret)
+ return ret;
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), type);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, key);
+ return 0;
+}
+
+/*
+ * NOTE: Don't change existing keys: it will break compatibility;
+ */
+struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
+ [MASTER_VOL_KEY] =
+ { .len = MASTER_VOL_KEY_SIZE << 3,
+ .label = "volume-master",
+ },
+ [NMTD_VOL_KEY] =
+ { .len = NMTD_VOL_KEY_SIZE << 3,
+ .label = "volume-nmtd-key-generation"
+ },
+ [NMTD_LINK_KEY] =
+ { .len = 128,
+ .label = "link-nmtd-authentication"
+ },
+ [EMTD_FILE_KEY] =
+ { .len = 128,
+ .label = "file-emtd-encryption-and-auth"
+ },
+ [DATA_FILE_KEY_256] =
+ { .len = 256,
+ .label = "file-data-encryption-256"
+ },
+ [DATA_FILE_KEY_512] =
+ { .len = 512,
+ .label = "file-data-encryption-512"
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
new file mode 100644
index 000000000..36b14c055
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.c
@@ -0,0 +1,605 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+#include "metadata.h"
+
+int32_t alloc_format(crypt_local_t *local, size_t size)
+{
+ if (size > 0) {
+ local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
+ if (!local->format)
+ return ENOMEM;
+ }
+ local->format_size = size;
+ return 0;
+}
+
+int32_t alloc_format_create(crypt_local_t *local)
+{
+ return alloc_format(local, new_format_size());
+}
+
+void free_format(crypt_local_t *local)
+{
+ GF_FREE(local->format);
+}
+
+/*
+ * Check compatibility with extracted metadata
+ */
+static int32_t check_file_metadata(struct crypt_inode_info *info)
+{
+ struct object_cipher_info *object = &info->cinfo;
+
+ if (info->nr_minor != CRYPT_XLATOR_ID) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported minor subversion %d", info->nr_minor);
+ return EINVAL;
+ }
+ if (object->o_alg > LAST_CIPHER_ALG) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher algorithm %d",
+ object->o_alg);
+ return EINVAL;
+ }
+ if (object->o_mode > LAST_CIPHER_MODE) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher mode %d",
+ object->o_mode);
+ return EINVAL;
+ }
+ if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
+ object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
+ gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
+ object->o_block_bits);
+ return EINVAL;
+ }
+ /* TBD: check data key size */
+ return 0;
+}
+
+static size_t format_size_v1(mtd_op_t op, size_t old_size)
+{
+
+ switch (op) {
+ case MTD_CREATE:
+ return sizeof(struct mtd_format_v1);
+ case MTD_OVERWRITE:
+ return old_size;
+ case MTD_APPEND:
+ return old_size + NMTD_8_MAC_SIZE;
+ case MTD_CUT:
+ if (old_size > sizeof(struct mtd_format_v1))
+ return old_size - NMTD_8_MAC_SIZE;
+ else
+ return 0;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
+ return 0;
+ }
+}
+
+/*
+ * Calculate size of the updated format string.
+ * Returned zero means that we don't need to update the format string.
+ */
+size_t format_size(mtd_op_t op, size_t old_size)
+{
+ size_t versioned;
+
+ versioned = mtd_loaders[current_mtd_loader()].format_size(op,
+ old_size - sizeof(struct crypt_format));
+ if (versioned != 0)
+ return versioned + sizeof(struct crypt_format);
+ return 0;
+}
+
+/*
+ * size of the format string of newly created file (nr_links = 1)
+ */
+size_t new_format_size(void)
+{
+ return format_size(MTD_CREATE, 0);
+}
+
+/*
+ * Calculate per-link MAC by pathname
+ */
+static int32_t calc_link_mac_v1(struct mtd_format_v1 *fmt,
+ loc_t *loc,
+ unsigned char *result,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char nmtd_link_key[16];
+ CMAC_CTX *cctx;
+ size_t len;
+
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
+ return -1;
+ }
+ cctx = CMAC_CTX_new();
+ if (!cctx) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
+ return -1;
+ }
+ ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
+ EVP_aes_128_cbc(), 0);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Final(cctx, result, &len);
+ CMAC_CTX_free(cctx);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create per-link MAC of index @idx by pathname
+ */
+static int32_t create_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
+ return 0;
+}
+
+static int32_t create_format_v1(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ unsigned char nmtd_link_key[16];
+ uint32_t ad;
+ GCM128_CONTEXT *gctx;
+
+ fmt = (struct mtd_format_v1 *)wire;
+
+ fmt->minor_id = info->nr_minor;
+ fmt->alg_id = AES_CIPHER_ALG;
+ fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
+ fmt->block_bits = master->m_block_bits;
+ fmt->mode_id = master->m_mode;
+ /*
+ * retrieve keys for the parts of metadata
+ */
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret)
+ return ret;
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret)
+ return ret;
+
+ AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+
+ /* TBD: Check return values */
+
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_encrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * set MAC of encrypted part of metadata
+ */
+ CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
+ CRYPTO_gcm128_release(gctx);
+ /*
+ * set the first MAC of non-encrypted part of metadata
+ */
+ return create_link_mac_v1(fmt, 0, loc, info, master);
+}
+
+/*
+ * Called by fops:
+ * ->create();
+ * ->link();
+ *
+ * Pack common and version-specific parts of file's metadata
+ * Pre-conditions: @info contains valid object-id.
+ */
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ struct crypt_format *fmt = (struct crypt_format *)wire;
+
+ fmt->loader_id = current_mtd_loader();
+
+ wire += sizeof(struct crypt_format);
+ return mtd_loaders[current_mtd_loader()].create_format(wire, loc,
+ info, master);
+}
+
+/*
+ * Append or overwrite per-link mac of @mac_idx index
+ * in accordance with the new pathname
+ */
+int32_t appov_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new, old, old_size);
+ return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx,
+ loc, info, master);
+}
+
+/*
+ * Cut per-link mac of @mac_idx index
+ */
+static int32_t cut_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new,
+ old,
+ sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
+
+ memcpy(new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1),
+ old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
+ old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
+ return 0;
+}
+
+int32_t update_format_v1(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx, /* of old name */
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ switch (op) {
+ case MTD_APPEND:
+ mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1))/8;
+ case MTD_OVERWRITE:
+ return appov_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ case MTD_CUT:
+ return cut_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
+ return -1;
+ }
+}
+
+/*
+ * Called by fops:
+ *
+ * ->link()
+ * ->unlink()
+ * ->rename()
+ *
+ */
+int32_t update_format(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ if (!new)
+ return 0;
+ memcpy(new, old, sizeof(struct crypt_format));
+
+ old += sizeof(struct crypt_format);
+ new += sizeof(struct crypt_format);
+ old_len -= sizeof(struct crypt_format);
+
+ return mtd_loaders[current_mtd_loader()].update_format(new, old,
+ old_len,
+ mac_idx, op,
+ loc, info,
+ master, local);
+}
+
+/*
+ * Perform preliminary checks of found metadata
+ * Return < 0 on errors;
+ * Return number of object-id MACs (>= 1) on success
+ */
+int32_t check_format_v1(uint32_t len, unsigned char *wire)
+{
+ uint32_t nr_links;
+
+ if (len < sizeof(struct mtd_format_v1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata size %d", len);
+ goto error;
+ }
+ len -= sizeof(struct mtd_format_v1);
+ if (len % sizeof(nmtd_8_mac_t)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata format");
+ goto error;
+ }
+ nr_links = 1 + len / sizeof(nmtd_8_mac_t);
+ if (nr_links > _POSIX_LINK_MAX)
+ goto error;
+ return nr_links;
+ error:
+ return EIO;
+}
+
+/*
+ * Verify per-link MAC specified by index @idx
+ *
+ * return:
+ * -1 on errors;
+ * 0 on failed verification;
+ * 1 on sucessful verification
+ */
+static int32_t verify_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx /* index of the mac to verify */,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
+ return 0;
+ return 1;
+}
+
+/*
+ * Lookup per-link MAC by pathname.
+ *
+ * return index of the MAC, if it was found;
+ * return < 0 on errors, or if the MAC wasn't found
+ */
+static int32_t lookup_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t nr_macs,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ uint32_t idx;
+
+ for (idx = 0; idx < nr_macs; idx++) {
+ ret = verify_link_mac_v1(fmt, idx, loc, info, master);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return idx;
+ }
+ return -ENOENT;
+}
+
+/*
+ * Extract version-specific part of metadata
+ */
+static int32_t open_format_v1(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ int32_t ret;
+ int32_t num_nmtd_macs;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ GCM128_CONTEXT *gctx;
+ uint32_t ad;
+ emtd_8_mac_t gmac;
+ struct object_cipher_info *object;
+
+ num_nmtd_macs = check_format_v1(len, wire);
+ if (num_nmtd_macs <= 0)
+ return EIO;
+ fmt = (struct mtd_format_v1 *)wire;
+
+ ret = lookup_link_mac_v1(fmt, num_nmtd_macs, loc, info, master);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
+ return EINVAL;
+ }
+ local->mac_idx = ret;
+ if (load_info == _gf_false)
+ /* the case of partial open */
+ return 0;
+
+ object = &info->cinfo;
+
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
+ return ret;
+ }
+ /*
+ * decrypt encrypted meta-data
+ */
+ ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
+ return ret;
+ }
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+ if (!gctx) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
+ return ENOMEM;
+ }
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_decrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * verify metadata
+ */
+ CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
+ CRYPTO_gcm128_release(gctx);
+ if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
+ gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
+ return EINVAL;
+ }
+ /*
+ * load verified metadata to the private part of inode
+ */
+ info->nr_minor = fmt->minor_id;
+
+ object->o_alg = fmt->alg_id;
+ object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
+ object->o_block_bits = fmt->block_bits;
+ object->o_mode = fmt->mode_id;
+
+ return check_file_metadata(info);
+}
+
+/*
+ * perform metadata authentication against @loc->path;
+ * extract crypt-specific attribtes and populate @info
+ * with them (optional)
+ */
+int32_t open_format(unsigned char *str,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ struct crypt_format *fmt;
+ if (len < sizeof(*fmt)) {
+ gf_log("crypt", GF_LOG_ERROR, "Bad core format");
+ return EIO;
+ }
+ fmt = (struct crypt_format *)str;
+
+ if (fmt->loader_id >= LAST_MTD_LOADER) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Unsupported loader id %d", fmt->loader_id);
+ return EINVAL;
+ }
+ str += sizeof(*fmt);
+ len -= sizeof(*fmt);
+
+ return mtd_loaders[fmt->loader_id].open_format(str,
+ len,
+ loc,
+ info,
+ master,
+ local,
+ load_info);
+}
+
+struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER] = {
+ [MTD_LOADER_V1] =
+ {.format_size = format_size_v1,
+ .create_format = create_format_v1,
+ .open_format = open_format_v1,
+ .update_format = update_format_v1
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
new file mode 100644
index 000000000..a92f149ef
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __METADATA_H__
+#define __METADATA_H__
+
+#define NMTD_8_MAC_SIZE (8)
+#define EMTD_8_MAC_SIZE (8)
+
+typedef uint8_t nmtd_8_mac_t[NMTD_8_MAC_SIZE];
+typedef uint8_t emtd_8_mac_t[EMTD_8_MAC_SIZE] ;
+
+/*
+ * Version "v1" of file's metadata.
+ * Metadata of this version has 4 components:
+ *
+ * 1) EMTD (Encrypted part of MeTaData);
+ * 2) NMTD (Non-encrypted part of MeTaData);
+ * 3) EMTD_MAC; (EMTD Message Authentication Code);
+ * 4) Array of per-link NMTD MACs (for every (hard)link it includes
+ * exactly one MAC)
+ */
+struct mtd_format_v1 {
+ /* EMTD, encrypted part of meta-data */
+ uint8_t alg_id; /* cipher algorithm id (only AES for now) */
+ uint8_t mode_id; /* cipher mode id; (only XTS for now) */
+ uint8_t block_bits; /* encoded block size */
+ uint8_t minor_id; /* client translator id */
+ uint8_t dkey_factor; /* encoded size of the data key */
+ /* MACs */
+ emtd_8_mac_t gmac; /* MAC of the encrypted meta-data, 8 bytes */
+ nmtd_8_mac_t omac; /* per-link MACs of the non-encrypted
+ * meta-data: at least one such MAC is always
+ * present */
+} __attribute__((packed));
+
+/*
+ * NMTD, the non-encrypted part of metadata of version "v1"
+ * is file's gfid, which is generated on trusted machines.
+ */
+#define SIZE_OF_NMTD_V1 (sizeof(uuid_t))
+#define SIZE_OF_EMTD_V1 (offsetof(struct mtd_format_v1, gmac) - \
+ offsetof(struct mtd_format_v1, alg_id))
+#define SIZE_OF_NMTD_V1_MAC (NMTD_8_MAC_SIZE)
+#define SIZE_OF_EMTD_V1_MAC (EMTD_8_MAC_SIZE)
+
+static inline unsigned char *get_EMTD_V1(struct mtd_format_v1 *format)
+{
+ return &format->alg_id;
+}
+
+static inline unsigned char *get_NMTD_V1(struct crypt_inode_info *info)
+{
+ return info->oid;
+}
+
+static inline unsigned char *get_EMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->gmac;
+}
+
+static inline unsigned char *get_NMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->omac;
+}
+
+#endif /* __METADATA_H__ */
diff --git a/xlators/encryption/rot-13/src/Makefile.am b/xlators/encryption/rot-13/src/Makefile.am
index ba5e623d8..94e8d18e7 100644
--- a/xlators/encryption/rot-13/src/Makefile.am
+++ b/xlators/encryption/rot-13/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = rot-13.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-rot_13_la_LDFLAGS = -module -avoidversion
+rot_13_la_LDFLAGS = -module -avoid-version
rot_13_la_SOURCES = rot-13.c
rot_13_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = rot-13.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c
index 3cf925ed8..b9ac29a72 100644
--- a/xlators/encryption/rot-13/src/rot-13.c
+++ b/xlators/encryption/rot-13/src/rot-13.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <ctype.h>
#include <sys/uio.h>
@@ -32,13 +22,13 @@
#include "rot-13.h"
/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
+ * This is a rot13 ``encryption'' xlator. It rot13's data when
+ * writing to disk and rot13's it back when reading it.
* This xlator is meant as an example, NOT FOR PRODUCTION
* USE ;) (hence no error-checking)
*/
-void
+void
rot13 (char *buf, int len)
{
int i;
@@ -68,14 +58,15 @@ rot13_readv_cbk (call_frame_t *frame,
struct iovec *vector,
int32_t count,
struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
-
+
if (priv->decrypt_read)
rot13_iovec (vector, count);
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
return 0;
}
@@ -84,13 +75,13 @@ rot13_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame,
rot13_readv_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -101,9 +92,10 @@ rot13_writev_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -112,20 +104,20 @@ rot13_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
if (priv->encrypt_write)
rot13_iovec (vector, count);
- STACK_WIND (frame,
+ STACK_WIND (frame,
rot13_writev_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset,
- iobref);
+ fd, vector, count, offset, flags,
+ iobref, xdata);
return 0;
}
@@ -136,7 +128,7 @@ init (xlator_t *this)
rot_13_private_t *priv = NULL;
if (!this->children || this->children->next) {
- gf_log ("rot13", GF_LOG_ERROR,
+ gf_log ("rot13", GF_LOG_ERROR,
"FATAL: rot13 should have exactly one child");
return -1;
}
@@ -194,15 +186,14 @@ struct xlator_fops fops = {
.writev = rot13_writev
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
- { .key = {"encrypt-write"},
+ { .key = {"encrypt-write"},
.type = GF_OPTION_TYPE_BOOL
},
- { .key = {"decrypt-read"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
},
{ .key = {NULL} },
};
diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h
index 8ef8162ae..3e9fc19c7 100644
--- a/xlators/encryption/rot-13/src/rot-13.h
+++ b/xlators/encryption/rot-13/src/rot-13.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ROT_13_H__
#define __ROT_13_H__
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 809bbe510..d2f5ef192 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = locks trash quota read-only mac-compat quiesce marker#path-converter # filter
+SUBDIRS = locks quota read-only mac-compat quiesce marker index \
+ protect compress changelog gfid-access $(GLUPY_SUBDIR) qemu-block # trash path-converter # filter
CLEANFILES =
diff --git a/xlators/features/changelog/Makefile.am b/xlators/features/changelog/Makefile.am
new file mode 100644
index 000000000..153bb6850
--- /dev/null
+++ b/xlators/features/changelog/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src lib
+
+CLEANFILES =
diff --git a/xlators/protocol/legacy/lib/Makefile.am b/xlators/features/changelog/lib/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/xlators/protocol/legacy/lib/Makefile.am
+++ b/xlators/features/changelog/lib/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c
new file mode 100644
index 000000000..14562585a
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes.c
@@ -0,0 +1,87 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) \
+ printf ("%s (reason: %s)\n", fn, strerror (errno))
+
+int
+main (int argc, char ** argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {0,};
+
+ /* get changes for brick "/home/vshankar/export/yow/yow-1" */
+ ret = gf_changelog_register ("/home/vshankar/export/yow/yow-1",
+ "/tmp/scratch", "/tmp/change.log", 9, 5);
+ if (ret) {
+ handle_error ("register failed");
+ goto out;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan ();
+ if (nr_changes < 0) {
+ handle_error ("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ printf ("Got %ld changelog files\n", nr_changes);
+
+ while ( (changes =
+ gf_changelog_next_change (fbuf, PATH_MAX)) > 0) {
+ printf ("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done (fbuf);
+ if (ret)
+ handle_error ("gf_changelog_done");
+ }
+
+ if (changes == -1)
+ handle_error ("gf_changelog_next_change");
+
+ next:
+ sleep (10);
+ }
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py
new file mode 100644
index 000000000..d21db8eab
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/changes.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+import os
+import sys
+import time
+import libgfchangelog
+
+cl = libgfchangelog.Changes()
+
+def get_changes(brick, scratch_dir, log_file, log_level, interval):
+ change_list = []
+ try:
+ cl.cl_register(brick, scratch_dir, log_file, log_level)
+ while True:
+ cl.cl_scan()
+ change_list = cl.cl_getchanges()
+ if change_list:
+ print change_list
+ for change in change_list:
+ print('done with %s' % (change))
+ cl.cl_done(change)
+ time.sleep(interval)
+ except OSError:
+ ex = sys.exc_info()[1]
+ print ex
+
+if __name__ == '__main__':
+ if len(sys.argv) != 5:
+ print("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>"
+ % (sys.argv[0]))
+ sys.exit(1)
+ get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4]))
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
new file mode 100644
index 000000000..68ec3baf1
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -0,0 +1,64 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
new file mode 100644
index 000000000..fbaaea628
--- /dev/null
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -0,0 +1,37 @@
+libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/features/changelog/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(GF_GLUSTERFS_LIBS)
+
+libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS)
+
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
+lib_LTLIBRARIES = libgfchangelog.la
+
+CONTRIB_BUILDDIR = $(top_builddir)/contrib
+
+libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-process.c \
+ gf-changelog-helpers.c $(CONTRIBDIR)/uuid/clear.c \
+ $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c \
+ $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/parse.c \
+ $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c \
+ $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c \
+ $(CONTRIBDIR)/uuid/unpack.c
+
+noinst_HEADERS = gf-changelog-helpers.h $(CONTRIBDIR)/uuid/uuidd.h \
+ $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
+ $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+libgfchangelog_HEADERS = changelog.h
+
+CLEANFILES =
+CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/changelog/lib/src/changelog.h b/xlators/features/changelog/lib/src/changelog.h
new file mode 100644
index 000000000..5cddfb583
--- /dev/null
+++ b/xlators/features/changelog/lib/src/changelog.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+/* API set */
+
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan ();
+
+int
+gf_changelog_start_fresh ();
+
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done (char *file);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
new file mode 100644
index 000000000..1eef8bf04
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-mem-types.h"
+#include "gf-changelog-helpers.h"
+
+ssize_t gf_changelog_read_path (int fd, char *buffer, size_t bufsize)
+{
+ return read (fd, buffer, bufsize);
+}
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t writen = 0;
+
+ while (writen < len) {
+ size = write (fd,
+ buffer + writen, len - writen);
+ if (size <= 0)
+ break;
+
+ writen += size;
+ }
+
+ return writen;
+}
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr)
+{
+ for (; *s; s++) {
+ if (estr[*s])
+ sprintf(enc, "%c", estr[*s]);
+ else
+ sprintf(enc, "%%%02X", *s);
+ while (*++enc);
+ }
+}
+
+/**
+ * thread safe version of readline with buffering
+ * (taken from Unix Network Programming Volume I, W.R. Stevens)
+ *
+ * This is favoured over fgets() as we'd need to ftruncate()
+ * (see gf_changelog_scan() API) to record new changelog files.
+ * stream open functions does have a truncate like api (although
+ * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp),
+ * but this involves mixing POSIX file descriptors and stream FILE *).
+ *
+ * NOTE: This implmentation still does work with more than one fd's
+ * used to perform gf_readline(). For this very reason it's not
+ * made a part of libglusterfs.
+ */
+
+static pthread_key_t rl_key;
+static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
+
+static void
+readline_destructor (void *ptr)
+{
+ GF_FREE (ptr);
+}
+
+static void
+readline_once (void)
+{
+ pthread_key_create (&rl_key, readline_destructor);
+}
+
+static ssize_t
+my_read (read_line_t *tsd, int fd, char *ptr)
+{
+ if (tsd->rl_cnt <= 0) {
+ if ( (tsd->rl_cnt = read (fd, tsd->rl_buf, MAXLINE)) < 0 )
+ return -1;
+ else if (tsd->rl_cnt == 0)
+ return 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+ }
+
+ tsd->rl_cnt--;
+ *ptr = *tsd->rl_bufptr++;
+ return 1;
+}
+
+static int
+gf_readline_init_once (read_line_t **tsd)
+{
+ if (pthread_once (&rl_once, readline_once) != 0)
+ return -1;
+
+ *tsd = pthread_getspecific (rl_key);
+ if (*tsd)
+ goto out;
+
+ *tsd = GF_CALLOC (1, sizeof (**tsd),
+ gf_changelog_mt_libgfchangelog_rl_t);
+ if (!*tsd)
+ return -1;
+
+ if (pthread_setspecific (rl_key, *tsd) != 0)
+ return -1;
+
+ out:
+ return 0;
+}
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen)
+{
+ size_t n = 0;
+ size_t rc = 0;
+ char c = ' ';
+ char *ptr = NULL;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ ptr = vptr;
+ for (n = 1; n < maxlen; n++) {
+ if ( (rc = my_read (tsd, fd, &c)) == 1 ) {
+ *ptr++ = c;
+ if (c == '\n')
+ break;
+ } else if (rc == 0) {
+ *ptr = '\0';
+ return (n - 1);
+ } else
+ return -1;
+ }
+
+ *ptr = '\0';
+ return n;
+
+}
+
+off_t
+gf_lseek (int fd, off_t offset, int whence)
+{
+ off_t off = 0;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if ( (off = lseek (fd, offset, whence)) == -1)
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return off;
+}
+
+int
+gf_ftruncate (int fd, off_t length)
+{
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if (ftruncate (fd, 0))
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return 0;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
new file mode 100644
index 000000000..3aa6ed7b8
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_HELPERS_H
+#define _GF_CHANGELOG_HELPERS_H
+
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include <pthread.h>
+
+#include <xlator.h>
+
+#define GF_CHANGELOG_TRACKER "tracker"
+
+#define GF_CHANGELOG_CURRENT_DIR ".current"
+#define GF_CHANGELOG_PROCESSED_DIR ".processed"
+#define GF_CHANGELOG_PROCESSING_DIR ".processing"
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) do { \
+ memcpy (ascii + off, ptr, len); \
+ off += len; \
+ } while (0)
+
+typedef struct read_line {
+ int rl_cnt;
+ char *rl_bufptr;
+ char rl_buf[MAXLINE];
+} read_line_t;
+
+typedef struct gf_changelog {
+ xlator_t *this;
+
+ /* 'processing' directory stream */
+ DIR *gfc_dir;
+
+ /* fd to the tracker file */
+ int gfc_fd;
+
+ /* connection retries */
+ int gfc_connretries;
+
+ char gfc_sockpath[PATH_MAX];
+
+ char gfc_brickpath[PATH_MAX];
+
+ /* socket for recieving notifications */
+ int gfc_sockfd;
+
+ char *gfc_working_dir;
+
+ /* RFC 3986 string encoding */
+ char rfc3986[256];
+
+ char gfc_current_dir[PATH_MAX];
+ char gfc_processed_dir[PATH_MAX];
+ char gfc_processing_dir[PATH_MAX];
+
+ pthread_t gfc_changelog_processor;
+} gf_changelog_t;
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc);
+
+void *
+gf_changelog_process (void *data);
+
+ssize_t
+gf_changelog_read_path (int fd, char *buffer, size_t bufsize);
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr);
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len);
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen);
+
+int
+gf_ftruncate (int fd, off_t length);
+
+off_t
+gf_lseek (int fd, off_t offset, int whence);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-process.c b/xlators/features/changelog/lib/src/gf-changelog-process.c
new file mode 100644
index 000000000..df7204931
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-process.c
@@ -0,0 +1,571 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <pthread.h>
+
+#include "uuid.h"
+#include "globals.h"
+#include "glusterfs.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+
+extern int byebye;
+
+/**
+ * number of gfid records after fop number
+ */
+int nr_gfids[] = {
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 1,
+ [GF_FOP_RMDIR] = 1,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+};
+
+static char *
+binary_to_ascii (uuid_t uuid)
+{
+ return uuid_utoa (uuid);
+}
+
+static char *
+conv_noop (char *ptr) { return ptr; }
+
+#define VERIFY_SEPARATOR(ptr, plen, perr) \
+ { \
+ if (*(ptr + plen) != '\0') { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define MOVER_MOVE(mover, nleft, bytes) \
+ { \
+ mover += bytes; \
+ nleft -= bytes; \
+ } \
+
+#define PARSE_GFID(mov, ptr, le, fn, perr) \
+ { \
+ VERIFY_SEPARATOR (mov, le, perr); \
+ ptr = fn (mov); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \
+ { \
+ GF_CHANGELOG_FILL_BUFFER (pt, buf, of, strlen (pt)); \
+ MOVER_MOVE (mo, nl, le); \
+ }
+
+
+#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \
+ { \
+ memcpy (uuid, mover, sizeof (uuid_t)); \
+ ptr = binary_to_ascii (uuid); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ MOVER_MOVE (mover, nleft, sizeof (uuid_t)); \
+ } \
+
+#define LINE_BUFSIZE 3*PATH_MAX /* enough buffer for extra chars too */
+
+/**
+ * using mmap() makes parsing easy. fgets() cannot be used here as
+ * the binary gfid could contain a line-feed (0x0A), in that case fgets()
+ * would read an incomplete line and parsing would fail. using POSIX fds
+ * would result is additional code to maintain state in case of partial
+ * reads of data (where multiple entries do not fit extirely in the buffer).
+ *
+ * mmap() gives the flexibility of pointing to an offset in the file
+ * without us worrying about reading it in memory (VM does that for us for
+ * free).
+ */
+
+static int
+gf_changelog_parse_binary (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+
+{
+ int ret = -1;
+ off_t off = 0;
+ off_t nleft = 0;
+ uuid_t uuid = {0,};
+ char *ptr = NULL;
+ char *bname_start = NULL;
+ char *bname_end = NULL;
+ char *mover = NULL;
+ char *start = NULL;
+ char current_mover = ' ';
+ size_t blen = 0;
+ int parse_err = 0;
+ char ascii[LINE_BUFSIZE] = {0,};
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+
+ off = blen = 0;
+ ptr = bname_start = bname_end = NULL;
+
+ current_mover = *mover;
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ bname_start = mover;
+ if ( (bname_end = strchr (mover, '\n')) == NULL ) {
+ parse_err = 1;
+ break;
+ }
+
+ blen = bname_end - bname_start;
+ MOVER_MOVE (mover, nleft, blen);
+
+ break;
+
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (ptr, ascii, off, strlen (ptr));
+ if (blen)
+ GF_CHANGELOG_FILL_BUFFER (bname_start,
+ ascii, off, blen);
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing binary changelog failed due to "
+ " error in writing ascii change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+ out:
+ return ret;
+}
+
+/**
+ * ascii decoder:
+ * - separate out one entry from another
+ * - use fop name rather than fop number
+ */
+static int
+gf_changelog_parse_ascii (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+{
+ int ng = 0;
+ int ret = -1;
+ int fop = 0;
+ int len = 0;
+ off_t off = 0;
+ off_t nleft = 0;
+ char *ptr = NULL;
+ char *eptr = NULL;
+ char *start = NULL;
+ char *mover = NULL;
+ int parse_err = 0;
+ char current_mover = ' ';
+ char ascii[LINE_BUFSIZE] = {0,};
+ const char *fopname = NULL;
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+ off = 0;
+ current_mover = *mover;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE(ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE (ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE (" ", ascii, off,
+ mover, nleft, 1);
+
+ /* fop */
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ fop = atoi (mover);
+ if ( (fopname = gf_fop_list[fop]) == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, len);
+
+ len = strlen (fopname);
+ GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len);
+
+ /* pargfid + bname */
+ ng = nr_gfids[fop];
+ while (ng-- > 0) {
+ MOVER_MOVE (mover, nleft, 1);
+ len = strlen (mover);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ PARSE_GFID (mover, ptr, len,
+ conv_noop, parse_err);
+ eptr = calloc (3, strlen (ptr));
+ if (!eptr) {
+ parse_err = 1;
+ break;
+ }
+
+ gf_rfc3986_encode ((unsigned char *) ptr,
+ eptr, gfc->rfc3986);
+ FILL_AND_MOVE (eptr, ascii, off,
+ mover, nleft, len);
+ free (eptr);
+ }
+
+ break;
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing ascii changelog failed due to "
+ " wrror in writing change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+
+ out:
+ return ret;
+}
+
+#define COPY_BUFSIZE 8192
+static int
+gf_changelog_copy (xlator_t *this, int from_fd, int to_fd)
+{
+ ssize_t size = 0;
+ char buffer[COPY_BUFSIZE+1] = {0,};
+
+ while (1) {
+ size = read (from_fd, buffer, COPY_BUFSIZE);
+ if (size <= 0)
+ break;
+
+ if (gf_changelog_write (to_fd,
+ buffer, size) != size) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error processing ascii changlog");
+ size = -1;
+ break;
+ }
+ }
+
+ return (size < 0 ? -1 : 0);
+}
+
+static int
+gf_changelog_decode (xlator_t *this, gf_changelog_t *gfc, int from_fd,
+ int to_fd, struct stat *stbuf, int *zerob)
+{
+ int ret = -1;
+ int encoding = -1;
+ size_t elen = 0;
+ char buffer[1024] = {0,};
+
+ CHANGELOG_GET_ENCODING (from_fd, buffer, 1024, encoding, elen);
+ if (encoding == -1) /* unknown encoding */
+ goto out;
+
+ if (!CHANGELOG_VALID_ENCODING (encoding))
+ goto out;
+
+ if (elen == stbuf->st_size) {
+ *zerob = 1;
+ goto out;
+ }
+
+ /**
+ * start processing after the header
+ */
+ lseek (from_fd, elen, SEEK_SET);
+
+ switch (encoding) {
+ case CHANGELOG_ENCODE_BINARY:
+ /**
+ * this ideally should have been a part of changelog-encoders.c
+ * (ie. part of the changelog translator).
+ */
+ ret = gf_changelog_parse_binary (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+
+ case CHANGELOG_ENCODE_ASCII:
+ ret = gf_changelog_parse_ascii (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+ default:
+ ret = gf_changelog_copy (this, from_fd, to_fd);
+ }
+
+ out:
+ return ret;
+}
+
+static int
+gf_changelog_consume (xlator_t *this, gf_changelog_t *gfc, char *from_path)
+{
+ int ret = -1;
+ int fd1 = 0;
+ int fd2 = 0;
+ int zerob = 0;
+ struct stat stbuf = {0,};
+ char dest[PATH_MAX] = {0,};
+ char to_path[PATH_MAX] = {0,};
+
+ ret = stat (from_path, &stbuf);
+ if (ret || !S_ISREG(stbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stat failed on changelog file: %s", from_path);
+ goto out;
+ }
+
+ fd1 = open (from_path, O_RDONLY);
+ if (fd1 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot open changelog file: %s (reason: %s)",
+ from_path, strerror (errno));
+ goto out;
+ }
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_current_dir, basename (from_path));
+ (void) snprintf (dest, PATH_MAX, "%s%s",
+ gfc->gfc_processing_dir, basename (from_path));
+
+ fd2 = open (to_path, O_CREAT | O_TRUNC | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd2 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot create ascii changelog file %s (reason %s)",
+ to_path, strerror (errno));
+ goto close_fd;
+ } else {
+ ret = gf_changelog_decode (this, gfc, fd1,
+ fd2, &stbuf, &zerob);
+
+ close (fd2);
+
+ if (!ret) {
+ /* move it to processing on a successfull
+ decode */
+ ret = rename (to_path, dest);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error moving %s to processing dir"
+ " (reason: %s)", to_path,
+ strerror (errno));
+ }
+
+ /* remove it from .current if it's an empty file */
+ if (zerob) {
+ ret = unlink (to_path);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not unlink %s (reason: %s",
+ to_path, strerror (errno));
+ }
+ }
+
+ close_fd:
+ close (fd1);
+
+ out:
+ return ret;
+}
+
+static char *
+gf_changelog_ext_change (xlator_t *this,
+ gf_changelog_t *gfc, char *path, size_t readlen)
+{
+ int alo = 0;
+ int ret = 0;
+ size_t len = 0;
+ char *buf = NULL;
+
+ buf = path;
+ while (len < readlen) {
+ if (*buf == '\0') {
+ alo = 1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "processing changelog: %s", path);
+ ret = gf_changelog_consume (this, gfc, path);
+ }
+
+ if (ret)
+ break;
+
+ len++; buf++;
+ if (alo) {
+ alo = 0;
+ path = buf;
+ }
+ }
+
+ return (ret) ? NULL : path;
+}
+
+void *
+gf_changelog_process (void *data)
+{
+ ssize_t len = 0;
+ ssize_t offlen = 0;
+ xlator_t *this = NULL;
+ char *sbuf = NULL;
+ gf_changelog_t *gfc = NULL;
+ char from_path[PATH_MAX] = {0,};
+
+ gfc = (gf_changelog_t *) data;
+ this = gfc->this;
+
+ pthread_detach (pthread_self());
+
+ for (;;) {
+ len = gf_changelog_read_path (gfc->gfc_sockfd,
+ from_path + offlen,
+ PATH_MAX - offlen);
+ if (len < 0)
+ continue; /* ignore it for now */
+
+ if (len == 0) { /* close() from the changelog translator */
+ gf_log (this->name, GF_LOG_INFO, "close from changelog"
+ " notification translator.");
+
+ if (gfc->gfc_connretries != 1) {
+ if (!gf_changelog_notification_init(this, gfc))
+ continue;
+ }
+
+ byebye = 1;
+ break;
+ }
+
+ len += offlen;
+ sbuf = gf_changelog_ext_change (this, gfc, from_path, len);
+ if (!sbuf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not extract changelog filename");
+ continue;
+ }
+
+ offlen = 0;
+ if (sbuf != (from_path + len)) {
+ offlen = from_path + len - sbuf;
+ memmove (from_path, sbuf, offlen);
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "byebye (%d) from processing thread...", byebye);
+ return NULL;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
new file mode 100644
index 000000000..ca8e373e7
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -0,0 +1,515 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "logging.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+int byebye = 0;
+
+static void
+gf_changelog_cleanup (gf_changelog_t *gfc)
+{
+ /* socket */
+ if (gfc->gfc_sockfd != -1)
+ close (gfc->gfc_sockfd);
+ /* tracker fd */
+ if (gfc->gfc_fd != -1)
+ close (gfc->gfc_fd);
+ /* processing dir */
+ if (gfc->gfc_dir)
+ closedir (gfc->gfc_dir);
+
+ if (gfc->gfc_working_dir)
+ free (gfc->gfc_working_dir); /* allocated by realpath */
+}
+
+void
+__attribute__ ((constructor)) gf_changelog_ctor (void)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return;
+
+ if (glusterfs_globals_init (ctx)) {
+ free (ctx);
+ ctx = NULL;
+ return;
+ }
+
+ THIS->ctx = ctx;
+}
+
+void
+__attribute__ ((destructor)) gf_changelog_dtor (void)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ return;
+
+ ctx = this->ctx;
+ gfc = this->private;
+
+ if (gfc) {
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ }
+
+ if (ctx) {
+ pthread_mutex_destroy (&ctx->lock);
+ free (ctx);
+ ctx = NULL;
+ }
+}
+
+
+static int
+gf_changelog_open_dirs (gf_changelog_t *gfc)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ int tracker_fd = 0;
+ char tracker_path[PATH_MAX] = {0,};
+
+ (void) snprintf (gfc->gfc_current_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_CURRENT_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_current_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processed_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSED_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processed_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processing_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSING_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processing_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ dir = opendir (gfc->gfc_processing_dir);
+ if (!dir) {
+ gf_log ("", GF_LOG_ERROR,
+ "opendir() error [reason: %s]", strerror (errno));
+ goto out;
+ }
+
+ gfc->gfc_dir = dir;
+
+ (void) snprintf (tracker_path, PATH_MAX,
+ "%s/"GF_CHANGELOG_TRACKER, gfc->gfc_working_dir);
+
+ tracker_fd = open (tracker_path, O_CREAT | O_APPEND | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (tracker_fd < 0) {
+ closedir (gfc->gfc_dir);
+ ret = -1;
+ goto out;
+ }
+
+ gfc->gfc_fd = tracker_fd;
+ ret = 0;
+ out:
+ return ret;
+}
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc)
+{
+ int ret = 0;
+ int len = 0;
+ int tries = 0;
+ int sockfd = 0;
+ struct sockaddr_un remote;
+
+ this = gfc->this;
+
+ if (gfc->gfc_sockfd != -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reconnecting...");
+ close (gfc->gfc_sockfd);
+ }
+
+ sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ CHANGELOG_MAKE_SOCKET_PATH (gfc->gfc_brickpath,
+ gfc->gfc_sockpath, PATH_MAX);
+ gf_log (this->name, GF_LOG_INFO,
+ "connecting to changelog socket: %s (brick: %s)",
+ gfc->gfc_sockpath, gfc->gfc_brickpath);
+
+ remote.sun_family = AF_UNIX;
+ strcpy (remote.sun_path, gfc->gfc_sockpath);
+
+ len = strlen (remote.sun_path) + sizeof (remote.sun_family);
+
+ while (tries < gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "connection attempt %d/%d...",
+ tries + 1, gfc->gfc_connretries);
+
+ /* initiate a connect */
+ if (connect (sockfd, (struct sockaddr *) &remote, len) == 0) {
+ gfc->gfc_sockfd = sockfd;
+ break;
+ }
+
+ tries++;
+ sleep (2);
+ }
+
+ if (tries == gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not connect to changelog socket!"
+ " bailing out...");
+ ret = -1;
+ } else
+ gf_log (this->name, GF_LOG_INFO,
+ "connection successful");
+
+ out:
+ return ret;
+}
+
+int
+gf_changelog_done (char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char to_path[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (!file || !strlen (file))
+ goto out;
+
+ /* make sure 'file' is inside ->gfc_working_dir */
+ buffer = realpath (file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp (gfc->gfc_working_dir,
+ buffer, strlen (gfc->gfc_working_dir)))
+ goto out;
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_processed_dir, basename (buffer));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "moving %s to processed directory", file);
+ ret = rename (buffer, to_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot move %s to %s (reason: %s)",
+ file, to_path, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (buffer)
+ free (buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * for a set of changelogs, start from the begining
+ */
+int
+gf_changelog_start_fresh ()
+{
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (gf_ftruncate (gfc->gfc_fd, 0))
+ goto out;
+
+ return 0;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * return the next changelog file entry. zero means all chanelogs
+ * consumed.
+ */
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen)
+{
+ ssize_t size = 0;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ tracker_fd = gfc->gfc_fd;
+
+ size = gf_readline (tracker_fd, buffer, maxlen);
+ if (size < 0)
+ goto out;
+ if (size == 0)
+ return 0;
+
+ memcpy (bufptr, buffer, size - 1);
+ *(buffer + size) = '\0';
+
+ return size;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_scan() - scan and generate a list of change entries
+ *
+ * calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ */
+ssize_t
+gf_changelog_scan ()
+{
+ int ret = 0;
+ int tracker_fd = 0;
+ size_t len = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_t *gfc = NULL;
+ struct dirent *entryp = NULL;
+ struct dirent *result = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ /**
+ * do we need to protect 'byebye' with locks? worst, the
+ * consumer would get notified during next scan().
+ */
+ if (byebye) {
+ errno = ECONNREFUSED;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ tracker_fd = gfc->gfc_fd;
+
+ if (gf_ftruncate (tracker_fd, 0))
+ goto out;
+
+ len = offsetof(struct dirent, d_name)
+ + pathconf(gfc->gfc_processing_dir, _PC_NAME_MAX) + 1;
+ entryp = GF_CALLOC (1, len,
+ gf_changelog_mt_libgfchangelog_dirent_t);
+ if (!entryp)
+ goto out;
+
+ rewinddir (gfc->gfc_dir);
+ while (1) {
+ ret = readdir_r (gfc->gfc_dir, entryp, &result);
+ if (ret || !result)
+ break;
+
+ if ( !strcmp (basename (entryp->d_name), ".")
+ || !strcmp (basename (entryp->d_name), "..") )
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER (gfc->gfc_processing_dir,
+ buffer, off,
+ strlen (gfc->gfc_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer,
+ off, strlen (entryp->d_name));
+ GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1);
+
+ if (gf_changelog_write (tracker_fd, buffer, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ GF_FREE (entryp);
+
+ if (!result) {
+ if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1)
+ return nr_entries;
+ }
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_register() - register a client for updates.
+ */
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_level, int max_reconnects)
+{
+ int i = 0;
+ int ret = -1;
+ int errn = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this->ctx)
+ goto out;
+
+ errno = ENOMEM;
+
+ gfc = GF_CALLOC (1, sizeof (*gfc),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!gfc)
+ goto out;
+
+ gfc->this = this;
+
+ gfc->gfc_dir = NULL;
+ gfc->gfc_fd = gfc->gfc_sockfd = -1;
+
+ gfc->gfc_working_dir = realpath (scratch_dir, NULL);
+ if (!gfc->gfc_working_dir) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_changelog_open_dirs (gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create entries in scratch dir");
+ goto cleanup;
+ }
+
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (this->ctx, log_file, NULL))
+ goto cleanup;
+
+ gf_log_set_loglevel ((log_level == -1) ? GF_LOG_INFO :
+ log_level);
+
+ gfc->gfc_connretries = (max_reconnects <= 0) ? 1 : max_reconnects;
+ (void) strncpy (gfc->gfc_brickpath, brick_path, PATH_MAX);
+
+ ret = gf_changelog_notification_init (this, gfc);
+ if (ret) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_thread_create (&gfc->gfc_changelog_processor,
+ NULL, gf_changelog_process, gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "error creating changelog processor thread"
+ " new changes won't be recorded!!!");
+ goto cleanup;
+ }
+
+ for (; i < 256; i++) {
+ gfc->rfc3986[i] =
+ (isalnum(i) || i == '~' ||
+ i == '-' || i == '.' || i == '_') ? i : 0;
+ }
+
+ ret = 0;
+ this->private = gfc;
+
+ goto out;
+
+ cleanup:
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ this->private = NULL;
+ errno = errn;
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am
new file mode 100644
index 000000000..e85031ad4
--- /dev/null
+++ b/xlators/features/changelog/src/Makefile.am
@@ -0,0 +1,19 @@
+xlator_LTLIBRARIES = changelog.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \
+ changelog-misc.h changelog-encoders.h changelog-notifier.h
+
+changelog_la_LDFLAGS = -module -avoidversion
+
+changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \
+ changelog-encoders.c changelog-notifier.c
+changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 \
+ -D_GNU_SOURCE -D$(GF_HOST_OS) -shared -nostartfiles -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c
new file mode 100644
index 000000000..553eec85c
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.c
@@ -0,0 +1,176 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "changelog-encoders.h"
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char *tmpbuf = NULL;
+ size_t bufsz = 0;
+ struct changelog_entry_fields *ce = NULL;
+
+ ce = (struct changelog_entry_fields *) data;
+
+ if (encode) {
+ tmpbuf = uuid_utoa (ce->cef_uuid);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf));
+ } else {
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_uuid, sizeof (uuid_t));
+ }
+
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_bname, strlen (ce->cef_bname));
+ return bufsz;
+}
+
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char buf[10] = {0,};
+ size_t bufsz = 0;
+ glusterfs_fop_t fop = 0;
+
+ fop = *(glusterfs_fop_t *) data;
+
+ if (encode) {
+ (void) snprintf (buf, sizeof (buf), "%d", fop);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf));
+ } else
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, &fop, sizeof (fop));
+
+ return bufsz;
+}
+
+void
+entry_free_fn (void *data)
+{
+ changelog_opt_t *co = data;
+
+ if (!co)
+ return;
+
+ GF_FREE (co->co_entry.cef_bname);
+}
+
+/**
+ * try to write all data in one shot
+ */
+
+static inline void
+changelog_encode_write_xtra (changelog_log_data_t *cld,
+ char *buffer, size_t *off, gf_boolean_t encode)
+{
+ int i = 0;
+ size_t offset = 0;
+ void *data = NULL;
+ changelog_opt_t *co = NULL;
+
+ offset = *off;
+
+ co = (changelog_opt_t *) cld->cld_ptr;
+
+ for (; i < cld->cld_xtra_records; i++, co++) {
+ CHANGELOG_FILL_BUFFER (buffer, offset, "\0", 1);
+
+ switch (co->co_type) {
+ case CHANGELOG_OPT_REC_FOP:
+ data = &co->co_fop;
+ break;
+ case CHANGELOG_OPT_REC_ENTRY:
+ data = &co->co_entry;
+ break;
+ }
+
+ if (co->co_convert)
+ offset += co->co_convert (data,
+ buffer + offset, encode);
+ else /* no coversion: write it out as it is */
+ CHANGELOG_FILL_BUFFER (buffer, offset,
+ data, co->co_len);
+ }
+
+ *off = offset;
+}
+
+int
+changelog_encode_ascii (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ size_t gfid_len = 0;
+ char *gfid_str = NULL;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gfid_str = uuid_utoa (cld->cld_gfid);
+ gfid_len = strlen (gfid_str);
+
+ /* extra bytes for decorations */
+ buffer = alloca (gfid_len + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_ASCII (priv, buffer,
+ off, gfid_str, gfid_len, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_true);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+int
+changelog_encode_binary (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ /* extra bytes for decorations */
+ buffer = alloca (sizeof (uuid_t) + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_BINARY (priv, buffer, off, cld->cld_gfid, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_false);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+static struct changelog_encoder
+cb_encoder[] = {
+ [CHANGELOG_ENCODE_BINARY] =
+ {
+ .encoder = CHANGELOG_ENCODE_BINARY,
+ .encode = changelog_encode_binary,
+ },
+ [CHANGELOG_ENCODE_ASCII] =
+ {
+ .encoder = CHANGELOG_ENCODE_ASCII,
+ .encode = changelog_encode_ascii,
+ },
+};
+
+void
+changelog_encode_change( changelog_priv_t * priv)
+{
+ priv->ce = &cb_encoder[priv->encode_mode];
+}
diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h
new file mode 100644
index 000000000..a3efbee05
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_ENCODERS_H
+#define _CHANGELOG_ENCODERS_H
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "changelog-helpers.h"
+
+#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, gfid_len); \
+ } while (0)
+
+#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, sizeof (uuid_t)); \
+ } while (0)
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode);
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode);
+void
+entry_free_fn (void *data);
+int
+changelog_encode_binary (xlator_t *, changelog_log_data_t *);
+int
+changelog_encode_ascii (xlator_t *, changelog_log_data_t *);
+void
+changelog_encode_change(changelog_priv_t *);
+
+#endif /* _CHANGELOG_ENCODERS_H */
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
new file mode 100644
index 000000000..7ab0091b5
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -0,0 +1,693 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "iobuf.h"
+
+#include "changelog-helpers.h"
+#include "changelog-mem-types.h"
+
+#include "changelog-encoders.h"
+#include <pthread.h>
+
+void
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id)
+{
+ int ret = 0;
+ void *retval = NULL;
+
+ /* send a cancel request to the thread */
+ ret = pthread_cancel (thr_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not cancel thread (reason: %s)",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = pthread_join (thr_id, &retval);
+ if (ret || (retval != PTHREAD_CANCELED)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cancel request not adhered as expected"
+ " (reason: %s)", strerror (errno));
+ }
+
+ out:
+ return;
+}
+
+inline void *
+changelog_get_usable_buffer (changelog_local_t *local)
+{
+ changelog_log_data_t *cld = NULL;
+
+ cld = &local->cld;
+ if (!cld->cld_iobuf)
+ return NULL;
+
+ return cld->cld_iobuf->ptr;
+}
+
+inline void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr)
+{
+ changelog_log_data_t *cld = NULL;
+
+ cld = &local->cld;
+
+ cld->cld_ptr_len = len;
+ cld->cld_xtra_records = xr;
+}
+
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local)
+{
+ int i = 0;
+ changelog_opt_t *co = NULL;
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return;
+
+ cld = &local->cld;
+
+ /* cleanup dynamic allocation for extra records */
+ if (cld->cld_xtra_records) {
+ co = (changelog_opt_t *) cld->cld_ptr;
+ for (; i < cld->cld_xtra_records; i++, co++)
+ if (co->co_free)
+ co->co_free (co);
+ }
+
+ CHANGELOG_IOBUF_UNREF (cld->cld_iobuf);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ mem_put (local);
+}
+
+inline int
+changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t writen = 0;
+
+ while (writen < len) {
+ size = write (fd,
+ buffer + writen, len - writen);
+ if (size <= 0)
+ break;
+
+ writen += size;
+ }
+
+ return (writen != len);
+}
+
+static int
+changelog_rollover_changelog (xlator_t *this,
+ changelog_priv_t *priv, unsigned long ts)
+{
+ int ret = -1;
+ int notify = 0;
+ char *bname = NULL;
+ char ofile[PATH_MAX] = {0,};
+ char nfile[PATH_MAX] = {0,};
+
+ if (priv->changelog_fd != -1) {
+ close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ }
+
+ (void) snprintf (ofile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME, priv->changelog_dir);
+ (void) snprintf (nfile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME".%lu",
+ priv->changelog_dir, ts);
+
+ ret = rename (ofile, nfile);
+ if (!ret)
+ notify = 1;
+
+ if (ret && (errno == ENOENT)) {
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error renaming %s -> %s (reason %s)",
+ ofile, nfile, strerror (errno));
+ }
+
+ if (notify) {
+ bname = basename (nfile);
+ gf_log (this->name, GF_LOG_DEBUG, "notifying: %s", bname);
+ ret = changelog_write (priv->wfd, bname, strlen (bname) + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to send file name to notify thread"
+ " (reason: %s)", strerror (errno));
+ }
+ }
+
+ return ret;
+}
+
+int
+changelog_open (xlator_t *this,
+ changelog_priv_t *priv)
+{
+ int fd = 0;
+ int ret = -1;
+ int flags = 0;
+ char buffer[1024] = {0,};
+ char changelog_path[PATH_MAX] = {0,};
+
+ (void) snprintf (changelog_path, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME,
+ priv->changelog_dir);
+
+ flags |= (O_CREAT | O_RDWR);
+ if (priv->fsync_interval == 0)
+ flags |= O_SYNC;
+
+ fd = open (changelog_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to open/create changelog file %s"
+ " (reason: %s). change-logging will be"
+ " inactive", changelog_path, strerror (errno));
+ goto out;
+ }
+
+ priv->changelog_fd = fd;
+
+ (void) snprintf (buffer, 1024, CHANGELOG_HEADER,
+ CHANGELOG_VERSION_MAJOR,
+ CHANGELOG_VERSION_MINOR,
+ priv->ce->encoder);
+ ret = changelog_write_change (priv, buffer, strlen (buffer));
+ if (ret) {
+ close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale)
+{
+ int ret = -1;
+
+ ret = changelog_rollover_changelog (this, priv, ts);
+
+ if (!ret && !finale)
+ ret = changelog_open (this, priv);
+
+ return ret;
+}
+
+/**
+ * return the length of entry
+ */
+inline size_t
+changelog_entry_length ()
+{
+ return sizeof (changelog_log_data_t);
+}
+
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last)
+{
+ struct timeval tv = {0,};
+
+ cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
+
+ if (gettimeofday (&tv, NULL))
+ return -1;
+
+ cld->cld_roll_time = (unsigned long) tv.tv_sec;
+ cld->cld_finale = is_last;
+ return 0;
+}
+
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len)
+{
+ return changelog_write (priv->changelog_fd, buffer, len);
+}
+
+inline int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld)
+{
+ int ret = 0;
+
+ if (CHANGELOG_TYPE_IS_ROLLOVER (cld->cld_type)) {
+ changelog_encode_change(priv);
+ ret = changelog_start_next_change (this, priv,
+ cld->cld_roll_time,
+ cld->cld_finale);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Problem rolling over changelog(s)");
+ goto out;
+ }
+
+ /**
+ * case when there is reconfigure done (disabling changelog) and there
+ * are still fops that have updates in prgress.
+ */
+ if (priv->changelog_fd == -1)
+ return 0;
+
+ if (CHANGELOG_TYPE_IS_FSYNC (cld->cld_type)) {
+ ret = fsync (priv->changelog_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync failed (reason: %s)",
+ strerror (errno));
+ }
+ goto out;
+ }
+
+ ret = priv->ce->encode (this, cld);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error writing changelog to disk");
+ }
+
+ out:
+ return ret;
+}
+
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode,
+ uuid_t gfid, int xtra_records,
+ gf_boolean_t update_flag)
+{
+ changelog_local_t *local = NULL;
+ struct iobuf *iobuf = NULL;
+
+ /**
+ * We relax the presence of inode if @update_flag is true.
+ * The caller (implmentation of the fop) needs to be careful to
+ * not blindly use local->inode.
+ */
+ if (!update_flag && !inode) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "inode needed for version checking !!!");
+ goto out;
+ }
+
+ if (xtra_records) {
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ xtra_records * CHANGELOG_OPT_RECORD_LEN);
+ if (!iobuf)
+ goto out;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ CHANGELOG_IOBUF_UNREF (iobuf);
+ goto out;
+ }
+
+ local->update_no_check = update_flag;
+
+ uuid_copy (local->cld.cld_gfid, gfid);
+
+ local->cld.cld_iobuf = iobuf;
+ local->cld.cld_xtra_records = 0; /* set by the caller */
+
+ if (inode)
+ local->inode = inode_ref (inode);
+
+ out:
+ return local;
+}
+
+int
+changelog_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+
+ ctx = (changelog_inode_ctx_t *) (long) ctx_addr;
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld)
+{
+ return priv->cd.dispatchfn (this, priv, priv->cd.cd_data, cld, NULL);
+}
+
+/**
+ * TODO: these threads have many thing in common (wake up after
+ * a certain time etc..). move them into separate routine.
+ */
+void *
+changelog_rollover (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_time_slice_t *slice = NULL;
+ changelog_priv_t *priv = data;
+
+ this = priv->cr.this;
+ slice = &priv->slice;
+
+ while (1) {
+ tv.tv_sec = priv->rollover_time;
+ tv.tv_usec = 0;
+
+ ret = select (0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to fill rollover data");
+ continue;
+ }
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ return NULL;
+}
+
+void *
+changelog_fsync_thread (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_priv_t *priv = data;
+
+ this = priv->cf.this;
+ cld.cld_type = CHANGELOG_TYPE_FSYNC;
+
+ while (1) {
+ tv.tv_sec = priv->fsync_interval;
+ tv.tv_usec = 0;
+
+ ret = select (0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to inject fsync event");
+ }
+
+ return NULL;
+}
+
+/* macros for inode/changelog version checks */
+
+#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) do { \
+ LOCK (&inode->lock); \
+ { \
+ LOCK (&priv->lock); \
+ { \
+ *iver = slice->changelog_version[type]; \
+ } \
+ UNLOCK (&priv->lock); \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) do { \
+ LOCK (&priv->lock); \
+ { \
+ upd = (ver == slice->changelog_version[type]) \
+ ? _gf_false : _gf_true; \
+ } \
+ UNLOCK (&priv->lock); \
+ } while (0)
+
+static int
+__changelog_inode_ctx_set (xlator_t *this,
+ inode_t *inode, changelog_inode_ctx_t *ctx)
+{
+ uint64_t ctx_addr = (uint64_t) ctx;
+ return __inode_ctx_set (inode, this, &ctx_addr);
+}
+
+/**
+ * one shot routine to get the address and the value of a inode version
+ * for a particular type.
+ */
+static changelog_inode_ctx_t *
+__changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (changelog_inode_ctx_t *) (long)ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_changelog_mt_inode_ctx_t);
+ if (!ctx)
+ goto out;
+
+ ret = __changelog_inode_ctx_set (this, inode, ctx);
+ if (ret) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+
+ out:
+ if (ctx && iver && version) {
+ *iver = CHANGELOG_INODE_VERSION_TYPE (ctx, type);
+ *version = **iver;
+ }
+
+ return ctx;
+}
+
+static changelog_inode_ctx_t *
+changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ changelog_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __changelog_inode_ctx_get (this,
+ inode, iver, version, type);
+ }
+ UNLOCK (&inode->lock);
+
+ return ctx;
+}
+
+/**
+ * This is the main update routine. Locking has been made granular so as to
+ * maximize parallelism of fops - I'll try to explain it below using execution
+ * timelines.
+ *
+ * Basically, the contention is between multiple execution threads of this
+ * routine and the roll-over thread. So, instead of having a big lock, we hold
+ * granular locks: inode->lock and priv->lock. Now I'll explain what happens
+ * when there is an update and a roll-over at just about the same time.
+ * NOTE:
+ * - the dispatcher itself synchronizes updates via it's own lock
+ * - the slice version in incremented by the roll-over thread
+ *
+ * Case 1: When the rollover thread wins before the inode version can be
+ * compared with the slice version.
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 1, 1, 1> |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 1 <-> S: 2 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * |
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Therefore, the change gets recorded in the next change (no lost change). If
+ * the slice version was ahead of the inode version (say I:1, S: 2), then
+ * anyway the comparison would result in a update (I: 1, S: 3).
+ *
+ * If the rollover time is too less, then there is another contention when the
+ * updater tries to bring up inode version to the slice version (this is also
+ * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE.
+ *
+ * <CTX: 1, 1, 1> | <SLICE: 2, 2, 2>
+ * |
+ * |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 3, 3, 3>
+ * | UNLOCK (&priv->lock)
+ *
+ *
+ * Case 2: When the fop thread wins
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 0, 0, 0> |
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 0 <-> S: 1 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 0, 0> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Here again, if the inode version was equal to the slice version (I: 1, S: 1)
+ * then there is no need to record an update (as the equality of the two version
+ * signifies an update was recorded in the current time slice).
+ */
+inline void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type)
+{
+ int ret = 0;
+ unsigned long *iver = NULL;
+ unsigned long version = 0;
+ inode_t *inode = NULL;
+ changelog_time_slice_t *slice = NULL;
+ changelog_inode_ctx_t *ctx = NULL;
+ changelog_log_data_t *cld_0 = NULL;
+ changelog_log_data_t *cld_1 = NULL;
+ changelog_local_t *next_local = NULL;
+ gf_boolean_t need_upd = _gf_true;
+
+ slice = &priv->slice;
+
+ /**
+ * for fops that do not require inode version checking
+ */
+ if (local->update_no_check)
+ goto update;
+
+ inode = local->inode;
+
+ ctx = changelog_inode_ctx_get (this,
+ inode, &iver, &version, type);
+ if (!ctx)
+ goto update;
+
+ INODE_VERSION_EQUALS_SLICE (priv, version, slice, type, need_upd);
+
+ update:
+ if (need_upd) {
+ cld_0 = &local->cld;
+ cld_0->cld_type = type;
+
+ if ( (next_local = local->prev_entry) != NULL ) {
+ cld_1 = &next_local->cld;
+ cld_1->cld_type = type;
+ }
+
+ ret = priv->cd.dispatchfn (this, priv,
+ priv->cd.cd_data, cld_0, cld_1);
+
+ /**
+ * update after the dispatcher has successfully done
+ * it's job.
+ */
+ if (!local->update_no_check && iver && !ret)
+ INODE_VERSION_UPDATE (priv, inode, iver, slice, type);
+ }
+
+ return;
+}
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
new file mode 100644
index 000000000..ad79636b0
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -0,0 +1,395 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_HELPERS_H
+#define _CHANGELOG_HELPERS_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+#include "iobuf.h"
+
+#include "changelog-misc.h"
+
+/**
+ * the changelog entry
+ */
+typedef struct changelog_log_data {
+ /* rollover related */
+ unsigned long cld_roll_time;
+
+ /* reopen changelog? */
+ gf_boolean_t cld_finale;
+
+ changelog_log_type cld_type;
+
+ /**
+ * sincd gfid is _always_ a necessity, it's not a part
+ * of the iobuf. by doing this we do not add any overhead
+ * for data and metadata related fops.
+ */
+ uuid_t cld_gfid;
+
+ /**
+ * iobufs are used for optionals records: pargfid, path,
+ * write offsets etc.. It's the fop implementers job
+ * to allocate (iobuf_get() in the fop) and get unref'ed
+ * in the callback (CHANGELOG_STACK_UNWIND).
+ */
+ struct iobuf *cld_iobuf;
+
+#define cld_ptr cld_iobuf->ptr
+
+ /**
+ * after allocation you can point this to the length of
+ * usable data, but make sure it does not exceed the
+ * the size of the requested iobuf.
+ */
+ size_t cld_iobuf_len;
+
+#define cld_ptr_len cld_iobuf_len
+
+ /**
+ * number of optional records
+ */
+ int cld_xtra_records;
+} changelog_log_data_t;
+
+/**
+ * holder for dispatch function and private data
+ */
+
+typedef struct changelog_priv changelog_priv_t;
+
+typedef struct changelog_dispatcher {
+ void *cd_data;
+ int (*dispatchfn) (xlator_t *, changelog_priv_t *, void *,
+ changelog_log_data_t *, changelog_log_data_t *);
+} changelog_dispatcher_t;
+
+struct changelog_bootstrap {
+ changelog_mode_t mode;
+ int (*ctor) (xlator_t *, changelog_dispatcher_t *);
+ int (*dtor) (xlator_t *, changelog_dispatcher_t *);
+};
+
+struct changelog_encoder {
+ changelog_encoder_t encoder;
+ int (*encode) (xlator_t *, changelog_log_data_t *);
+};
+
+
+/* xlator private */
+
+typedef struct changelog_time_slice {
+ /**
+ * just in case we need nanosecond granularity some day.
+ * field is unused as of now (maybe we'd need it later).
+ */
+ struct timeval tv_start;
+
+ /**
+ * version of changelog file, incremented each time changes
+ * rollover.
+ */
+ unsigned long changelog_version[CHANGELOG_MAX_TYPE];
+} changelog_time_slice_t;
+
+typedef struct changelog_rollover {
+ /* rollover thread */
+ pthread_t rollover_th;
+
+ xlator_t *this;
+} changelog_rollover_t;
+
+typedef struct changelog_fsync {
+ /* fsync() thread */
+ pthread_t fsync_th;
+
+ xlator_t *this;
+} changelog_fsync_t;
+
+# define CHANGELOG_MAX_CLIENTS 5
+typedef struct changelog_notify {
+ /* reader end of the pipe */
+ int rfd;
+
+ /* notifier thread */
+ pthread_t notify_th;
+
+ /* unique socket path */
+ char sockpath[PATH_MAX];
+
+ int socket_fd;
+
+ /**
+ * simple array of accept()'ed fds. Not scalable at all
+ * for large number of clients, but it's okay as we have
+ * a ahrd limit in this version (@CHANGELOG_MAX_CLIENTS).
+ */
+ int client_fd[CHANGELOG_MAX_CLIENTS];
+
+ xlator_t *this;
+} changelog_notify_t;
+
+struct changelog_priv {
+ gf_boolean_t active;
+
+ /* to generate unique socket file per brick */
+ char *changelog_brick;
+
+ /* logging directory */
+ char *changelog_dir;
+
+ /* one file for all changelog types */
+ int changelog_fd;
+
+ gf_lock_t lock;
+
+ /* writen end of the pipe */
+ int wfd;
+
+ /* rollover time */
+ int32_t rollover_time;
+
+ /* fsync() interval */
+ int32_t fsync_interval;
+
+ /* changelog type maps */
+ const char *maps[CHANGELOG_MAX_TYPE];
+
+ /* time slicer */
+ changelog_time_slice_t slice;
+
+ /* context of the updater */
+ changelog_dispatcher_t cd;
+
+ /* context of the rollover thread */
+ changelog_rollover_t cr;
+
+ /* context of fsync thread */
+ changelog_fsync_t cf;
+
+ /* context of the notifier thread */
+ changelog_notify_t cn;
+
+ /* operation mode */
+ changelog_mode_t op_mode;
+
+ /* bootstrap routine for 'current' logger */
+ struct changelog_bootstrap *cb;
+
+ /* encoder mode */
+ changelog_encoder_t encode_mode;
+
+ /* encoder */
+ struct changelog_encoder *ce;
+};
+
+struct changelog_local {
+ inode_t *inode;
+ gf_boolean_t update_no_check;
+
+ changelog_log_data_t cld;
+
+ /**
+ * ->prev_entry is used in cases when there needs to be
+ * additional changelog entry for the parent (eg. rename)
+ * It's analogous to ->next in single linked list world,
+ * but we call it as ->prev_entry... ha ha ha
+ */
+ struct changelog_local *prev_entry;
+};
+
+typedef struct changelog_local changelog_local_t;
+
+/* inode version is stored in inode ctx */
+typedef struct changelog_inode_ctx {
+ unsigned long iversion[CHANGELOG_MAX_TYPE];
+} changelog_inode_ctx_t;
+
+#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type])
+
+/**
+ * Optional Records:
+ * fops that need to save additional information request a array of
+ * @changelog_opt_t struct. The array is allocated via @iobufs.
+ */
+typedef enum {
+ CHANGELOG_OPT_REC_FOP,
+ CHANGELOG_OPT_REC_ENTRY,
+} changelog_optional_rec_type_t;
+
+struct changelog_entry_fields {
+ uuid_t cef_uuid;
+ char *cef_bname;
+};
+
+typedef struct {
+ /**
+ * @co_covert can be used to do post-processing of the record before
+ * it's persisted to the CHANGELOG. If this is NULL, then the record
+ * is persisted as per it's in memory format.
+ */
+ size_t (*co_convert) (void *data, char *buffer, gf_boolean_t encode);
+
+ /* release routines */
+ void (*co_free) (void *data);
+
+ /* type of the field */
+ changelog_optional_rec_type_t co_type;
+
+ /**
+ * sizeof of the 'valid' field in the union. This field is not used if
+ * @co_convert is specified.
+ */
+ size_t co_len;
+
+ union {
+ glusterfs_fop_t co_fop;
+ struct changelog_entry_fields co_entry;
+ };
+} changelog_opt_t;
+
+#define CHANGELOG_OPT_RECORD_LEN sizeof (changelog_opt_t)
+
+/**
+ * helpers routines
+ */
+
+void
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id);
+inline void *
+changelog_get_usable_buffer (changelog_local_t *local);
+inline void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr);
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local);
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode, uuid_t gfid,
+ int xtra_records, gf_boolean_t update_flag);
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale);
+int
+changelog_open (xlator_t *this, changelog_priv_t *priv);
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last);
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld);
+inline size_t
+changelog_entry_length ();
+inline int
+changelog_write (int fd, char *buffer, size_t len);
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len);
+inline int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld);
+inline void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type);
+void *
+changelog_rollover (void *data);
+void *
+changelog_fsync_thread (void *data);
+int
+changelog_forget (xlator_t *this, inode_t *inode);
+
+/* macros */
+
+#define CHANGELOG_STACK_UNWIND(fop, frame, params ...) do { \
+ changelog_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __xl = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ changelog_local_cleanup (__xl, __local); \
+ if (__local && __local->prev_entry) \
+ changelog_local_cleanup (__xl, \
+ __local->prev_entry); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_REF(iobuf) do { \
+ if (iobuf) \
+ iobuf_ref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_UNREF(iobuf) do { \
+ if (iobuf) \
+ iobuf_unref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) do { \
+ memcpy (buffer + off, val, len); \
+ off += len; \
+ } while (0)
+
+#define SLICE_VERSION_UPDATE(slice) do { \
+ int i = 0; \
+ for (; i < CHANGELOG_MAX_TYPE; i++) { \
+ slice->changelog_version[i]++; \
+ } \
+ } while (0)
+
+#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_FOP; \
+ co->co_fop = fop; \
+ xlen += sizeof (fop); \
+ } while (0)
+
+#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, \
+ converter, freefn, xlen, label) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = freefn; \
+ co->co_type = CHANGELOG_OPT_REC_ENTRY; \
+ uuid_copy (co->co_entry.cef_uuid, pargfid); \
+ co->co_entry.cef_bname = gf_strdup(bname); \
+ if (!co->co_entry.cef_bname) \
+ goto label; \
+ xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \
+ } while (0)
+
+#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_false)
+
+#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_true)
+
+#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) do { \
+ if (!priv->active) \
+ goto label; \
+ /* ignore rebalance process's activity. */ \
+ if (frame->root->pid == GF_CLIENT_PID_DEFRAG) \
+ goto label; \
+ } while (0)
+
+/* ignore internal fops */
+#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(dict, label) do { \
+ if (dict && dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define CHANGELOG_COND_GOTO(priv, cond, label) do { \
+ if (!priv->active || cond) \
+ goto label; \
+ } while (0)
+
+#endif /* _CHANGELOG_HELPERS_H */
diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h
new file mode 100644
index 000000000..d72464eab
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-mem-types.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MEM_TYPES_H
+#define _CHANGELOG_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_changelog_mt_priv_t = gf_common_mt_end + 1,
+ gf_changelog_mt_str_t = gf_common_mt_end + 2,
+ gf_changelog_mt_batch_t = gf_common_mt_end + 3,
+ gf_changelog_mt_rt_t = gf_common_mt_end + 4,
+ gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5,
+ gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 6,
+ gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 7,
+ gf_changelog_mt_libgfchangelog_dirent_t = gf_common_mt_end + 8,
+ gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 9,
+ gf_changelog_mt_end
+};
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h
new file mode 100644
index 000000000..0712a3771
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-misc.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MISC_H
+#define _CHANGELOG_MISC_H
+
+#include "glusterfs.h"
+#include "common-utils.h"
+
+#define CHANGELOG_MAX_TYPE 3
+#define CHANGELOG_FILE_NAME "CHANGELOG"
+
+#define CHANGELOG_VERSION_MAJOR 1
+#define CHANGELOG_VERSION_MINOR 0
+
+#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock"
+
+/**
+ * header starts with the version and the format of the changelog.
+ * 'version' not much of a use now.
+ */
+#define CHANGELOG_HEADER \
+ "GlusterFS Changelog | version: v%d.%d | encoding : %d\n"
+
+#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) do { \
+ char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \
+ md5_wrapper((unsigned char *) brick_path, \
+ strlen(brick_path), \
+ md5_sum); \
+ (void) snprintf (sockpath, len, \
+ CHANGELOG_UNIX_SOCK, md5_sum); \
+ } while (0)
+
+/**
+ * ... used by libgfchangelog.
+ */
+#define CHANGELOG_GET_ENCODING(fd, buffer, len, enc, enc_len) do { \
+ FILE *fp; \
+ int fd_dup, maj, min; \
+ \
+ enc = -1; \
+ fd_dup = dup (fd); \
+ \
+ if (fd_dup != -1) { \
+ fp = fdopen (fd_dup, "r"); \
+ if (fp) { \
+ if (fgets (buffer, len, fp)) { \
+ elen = strlen (buffer); \
+ sscanf (buffer, \
+ CHANGELOG_HEADER, \
+ &maj, &min, &enc); \
+ } \
+ fclose (fp); \
+ } else { \
+ close (fd_dup); \
+ } \
+ } \
+ } while (0)
+
+/**
+ * everything after 'CHANGELOG_TYPE_ENTRY' are internal types
+ * (ie. none of the fops trigger this type of event), hence
+ * CHANGELOG_MAX_TYPE = 3
+ */
+typedef enum {
+ CHANGELOG_TYPE_DATA = 0,
+ CHANGELOG_TYPE_METADATA,
+ CHANGELOG_TYPE_ENTRY,
+ CHANGELOG_TYPE_ROLLOVER,
+ CHANGELOG_TYPE_FSYNC,
+} changelog_log_type;
+
+/* operation modes - RT for now */
+typedef enum {
+ CHANGELOG_MODE_RT = 0,
+} changelog_mode_t;
+
+/* encoder types */
+
+typedef enum {
+ CHANGELOG_ENCODE_MIN = 0,
+ CHANGELOG_ENCODE_BINARY,
+ CHANGELOG_ENCODE_ASCII,
+ CHANGELOG_ENCODE_MAX,
+} changelog_encoder_t;
+
+#define CHANGELOG_VALID_ENCODING(enc) \
+ (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX)
+
+#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY)
+#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER)
+#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC)
+
+#endif /* _CHANGELOG_MISC_H */
diff --git a/xlators/features/changelog/src/changelog-notifier.c b/xlators/features/changelog/src/changelog-notifier.c
new file mode 100644
index 000000000..1f8b31253
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-notifier.c
@@ -0,0 +1,314 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-notifier.h"
+
+#include <pthread.h>
+
+inline static void
+changelog_notify_clear_fd (changelog_notify_t *cn, int i)
+{
+ cn->client_fd[i] = -1;
+}
+
+inline static void
+changelog_notify_save_fd (changelog_notify_t *cn, int i, int fd)
+{
+ cn->client_fd[i] = fd;
+}
+
+static int
+changelog_notify_insert_fd (xlator_t *this, changelog_notify_t *cn, int fd)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ break;
+ }
+
+ if (i == CHANGELOG_MAX_CLIENTS) {
+ /**
+ * this case should not be hit as listen() would limit
+ * the number of completely established connections.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "hit max client limit (%d)", CHANGELOG_MAX_CLIENTS);
+ ret = -1;
+ }
+ else
+ changelog_notify_save_fd (cn, i, fd);
+
+ return ret;
+}
+
+static void
+changelog_notify_fill_rset (changelog_notify_t *cn, fd_set *rset, int *maxfd)
+{
+ int i = 0;
+
+ FD_ZERO (rset);
+
+ FD_SET (cn->socket_fd, rset);
+ *maxfd = cn->socket_fd;
+
+ FD_SET (cn->rfd, rset);
+ *maxfd = max (*maxfd, cn->rfd);
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] != -1) {
+ FD_SET (cn->client_fd[i], rset);
+ *maxfd = max (*maxfd, cn->client_fd[i]);
+ }
+ }
+
+ *maxfd = *maxfd + 1;
+}
+
+static int
+changelog_notify_client (changelog_notify_t *cn, char *path, ssize_t len)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ continue;
+
+ if (changelog_write (cn->client_fd[i],
+ path, len)) {
+ ret = -1;
+
+ close (cn->client_fd[i]);
+ changelog_notify_clear_fd (cn, i);
+ }
+ }
+
+ return ret;
+}
+
+static void
+changelog_notifier_init (changelog_notify_t *cn)
+{
+ int i = 0;
+
+ cn->socket_fd = -1;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ changelog_notify_clear_fd (cn, i);
+ }
+}
+
+static void
+changelog_close_client_conn (changelog_notify_t *cn)
+{
+ int i = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ continue;
+
+ close (cn->client_fd[i]);
+ changelog_notify_clear_fd (cn, i);
+ }
+}
+
+static void
+changelog_notifier_cleanup (void *arg)
+{
+ changelog_notify_t *cn = NULL;
+
+ cn = (changelog_notify_t *) arg;
+
+ changelog_close_client_conn (cn);
+
+ if (cn->socket_fd != -1)
+ close (cn->socket_fd);
+
+ if (cn->rfd)
+ close (cn->rfd);
+
+ if (unlink (cn->sockpath))
+ gf_log ("", GF_LOG_WARNING,
+ "could not unlink changelog socket file"
+ " %s (reason: %s", cn->sockpath, strerror (errno));
+}
+
+void *
+changelog_notifier (void *data)
+{
+ int i = 0;
+ int fd = 0;
+ int max_fd = 0;
+ int len = 0;
+ ssize_t readlen = 0;
+ xlator_t *this = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_notify_t *cn = NULL;
+ struct sockaddr_un local = {0,};
+ char path[PATH_MAX] = {0,};
+ char abspath[PATH_MAX] = {0,};
+
+ char buffer;
+ fd_set rset;
+
+ priv = (changelog_priv_t *) data;
+
+ cn = &priv->cn;
+ this = cn->this;
+
+ pthread_cleanup_push (changelog_notifier_cleanup, cn);
+
+ changelog_notifier_init (cn);
+
+ cn->socket_fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (cn->socket_fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "changelog socket error (reason: %s)",
+ strerror (errno));
+ goto out;
+ }
+
+ CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick,
+ cn->sockpath, PATH_MAX);
+ if (unlink (cn->sockpath) < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not unlink changelog socket file (%s)"
+ " (reason: %s)",
+ CHANGELOG_UNIX_SOCK, strerror (errno));
+ goto cleanup;
+ }
+ }
+
+ local.sun_family = AF_UNIX;
+ strcpy (local.sun_path, cn->sockpath);
+
+ len = strlen (local.sun_path) + sizeof (local.sun_family);
+
+ /* bind to the unix domain socket */
+ if (bind (cn->socket_fd, (struct sockaddr *) &local, len) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not bind to changelog socket (reason: %s)",
+ strerror (errno));
+ goto cleanup;
+ }
+
+ /* listen for incoming connections */
+ if (listen (cn->socket_fd, CHANGELOG_MAX_CLIENTS) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "listen() error on changelog socket (reason: %s)",
+ strerror (errno));
+ goto cleanup;
+ }
+
+ /**
+ * simple select() on all to-be-read file descriptors. This method
+ * though old school works pretty well when you have a handfull of
+ * fd's to be watched (clients).
+ *
+ * Future TODO: move this to epoll based notification facility if
+ * number of clients increase.
+ */
+ for (;;) {
+ changelog_notify_fill_rset (cn, &rset, &max_fd);
+
+ if (select (max_fd, &rset, NULL, NULL, NULL) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "select() returned -1 (reason: %s)",
+ strerror (errno));
+ sleep (2);
+ continue;
+ }
+
+ if (FD_ISSET (cn->socket_fd, &rset)) {
+ fd = accept (cn->socket_fd, NULL, NULL);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "accept error on changelog socket"
+ " (reason: %s)", strerror (errno));
+ } else if (changelog_notify_insert_fd (this, cn, fd)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "hit max client limit");
+ }
+ }
+
+ if (FD_ISSET (cn->rfd, &rset)) {
+ /**
+ * read changelog filename and notify all connected
+ * clients.
+ */
+ readlen = 0;
+ while (readlen < PATH_MAX) {
+ len = read (cn->rfd, &path[readlen++], 1);
+ if (len == -1) {
+ break;
+ }
+
+ if (len == 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rollover thread sent EOF"
+ " on pipe - possibly a crash.");
+ /* be blunt and close all connections */
+ pthread_exit(NULL);
+ }
+
+ if (path[readlen - 1] == '\0')
+ break;
+ }
+
+ /* should we close all client connections here too? */
+ if (len < 0 || readlen == PATH_MAX) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not get pathname from rollover"
+ " thread or pathname too long");
+ goto process_rest;
+ }
+
+ (void) snprintf (abspath, PATH_MAX,
+ "%s/%s", priv->changelog_dir, path);
+ if (changelog_notify_client (cn, abspath,
+ strlen (abspath) + 1))
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not notify some clients with new"
+ " changelogs");
+ }
+
+ process_rest:
+ for (i = 0; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if ( (fd = cn->client_fd[i]) == -1 )
+ continue;
+
+ if (FD_ISSET (fd, &rset)) {
+ /**
+ * the only data we accept from the client is a
+ * disconnect. Anything else is treated as bogus
+ * and is silently discarded (also warned!!!).
+ */
+ if ( (readlen = read (fd, &buffer, 1)) <= 0 ) {
+ close (fd);
+ changelog_notify_clear_fd (cn, i);
+ } else {
+ /* silently discard data and log */
+ gf_log (this->name, GF_LOG_WARNING,
+ "misbehaving changelog client");
+ }
+ }
+ }
+
+ }
+
+ cleanup:;
+ pthread_cleanup_pop (1);
+
+ out:
+ return NULL;
+}
diff --git a/xlators/features/changelog/src/changelog-notifier.h b/xlators/features/changelog/src/changelog-notifier.h
new file mode 100644
index 000000000..55e728356
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-notifier.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_NOTIFIER_H
+#define _CHANGELOG_NOTIFIER_H
+
+#include "changelog-helpers.h"
+
+void *
+changelog_notifier (void *data);
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c
new file mode 100644
index 000000000..c147f68ca
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.c
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "changelog-rt.h"
+#include "changelog-mem-types.h"
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = GF_CALLOC (1, sizeof (*crt),
+ gf_changelog_mt_rt_t);
+ if (!crt)
+ return -1;
+
+ LOCK_INIT (&crt->lock);
+
+ cd->cd_data = crt;
+ cd->dispatchfn = &changelog_rt_enqueue;
+
+ return 0;
+}
+
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = cd->cd_data;
+
+ LOCK_DESTROY (&crt->lock);
+ GF_FREE (crt);
+
+ return 0;
+}
+
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1)
+{
+ int ret = 0;
+ changelog_rt_t *crt = NULL;
+
+ crt = (changelog_rt_t *) cbatch;
+
+ LOCK (&crt->lock);
+ {
+ ret = changelog_handle_change (this, priv, cld_0);
+ if (!ret && cld_1)
+ ret = changelog_handle_change (this, priv, cld_1);
+ }
+ UNLOCK (&crt->lock);
+
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h
new file mode 100644
index 000000000..1fc2bbc5b
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_RT_H
+#define _CHANGELOG_RT_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+
+#include "changelog-helpers.h"
+
+/* unused as of now - may be you would need it later */
+typedef struct changelog_rt {
+ gf_lock_t lock;
+} changelog_rt_t;
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1);
+
+#endif /* _CHANGELOG_RT_H */
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
new file mode 100644
index 000000000..cea0e8c70
--- /dev/null
+++ b/xlators/features/changelog/src/changelog.c
@@ -0,0 +1,1477 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "iobuf.h"
+
+#include "changelog-rt.h"
+
+#include "changelog-encoders.h"
+#include "changelog-mem-types.h"
+
+#include <pthread.h>
+
+#include "changelog-notifier.h"
+
+static struct changelog_bootstrap
+cb_bootstrap[] = {
+ {
+ .mode = CHANGELOG_MODE_RT,
+ .ctor = changelog_rt_init,
+ .dtor = changelog_rt_fini,
+ },
+};
+
+/* Entry operations - TYPE III */
+
+/**
+ * entry operations do not undergo inode version checking.
+ */
+
+/* {{{ */
+
+/* rmdir */
+
+int32_t
+changelog_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_rmdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir,
+ loc, xflags, xdata);
+ return 0;
+}
+
+/* unlink */
+
+int32_t
+changelog_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ loc, xflags, xdata);
+ return 0;
+}
+
+/* rename */
+
+int32_t
+changelog_rename_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno,
+ buf, preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+
+int32_t
+changelog_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, oldloc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, oldloc->pargfid, oldloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 3);
+
+ wind:
+ STACK_WIND (frame, changelog_rename_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+/* link */
+
+int32_t
+changelog_link_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_link (call_frame_t *frame,
+ xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, oldloc->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_link_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+/* mkdir */
+
+int32_t
+changelog_mkdir_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_mkdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+/* symlink */
+
+int32_t
+changelog_symlink_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (symlink, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ size_t xtra_len = 0;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_symlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+}
+
+/* mknod */
+
+int32_t
+changelog_mknod_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (mknod, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mknod (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_mknod_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, dev, umask, xdata);
+ return 0;
+}
+
+/* creat */
+
+int32_t
+changelog_create_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (create, frame,
+ op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_opt_t *co = NULL;
+ changelog_priv_t *priv = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ /* init with two extra records */
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+/* }}} */
+
+
+/* Metadata modification fops - TYPE II */
+
+/* {{{ */
+
+/* {f}setattr */
+
+int32_t
+changelog_fsetattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+
+
+}
+
+int32_t
+changelog_fsetattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_fsetattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+
+
+}
+
+int32_t
+changelog_setattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_setattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+/* {f}removexattr */
+
+int32_t
+changelog_fremovexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_fremovexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int32_t
+changelog_removexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_removexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+/* {f}setxattr */
+
+int32_t
+changelog_setxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setxattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_setxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_fsetxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+/* }}} */
+
+
+/* Data modification fops - TYPE I */
+
+/* {{{ */
+
+/* {f}truncate() */
+
+int32_t
+changelog_truncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (truncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_truncate (call_frame_t *frame,
+ xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_truncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (ftruncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_ftruncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+/* writev() */
+
+int32_t
+changelog_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret <= 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (writev, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_writev (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_writev_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+/* }}} */
+
+/**
+ * The
+ * - @init ()
+ * - @fini ()
+ * - @reconfigure ()
+ * ... and helper routines
+ */
+
+/**
+ * needed if there are more operation modes in the future.
+ */
+static void
+changelog_assign_opmode (changelog_priv_t *priv, char *mode)
+{
+ if ( strncmp (mode, "realtime", 8) == 0 ) {
+ priv->op_mode = CHANGELOG_MODE_RT;
+ }
+}
+
+static void
+changelog_assign_encoding (changelog_priv_t *priv, char *enc)
+{
+ if ( strncmp (enc, "binary", 6) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_BINARY;
+ } else if ( strncmp (enc, "ascii", 5) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_ASCII;
+ }
+}
+
+/* cleanup any helper threads that are running */
+static void
+changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ if (priv->cr.rollover_th) {
+ changelog_thread_cleanup (this, priv->cr.rollover_th);
+ priv->cr.rollover_th = 0;
+ }
+
+ if (priv->cf.fsync_th) {
+ changelog_thread_cleanup (this, priv->cf.fsync_th);
+ priv->cf.fsync_th = 0;
+ }
+}
+
+/* spawn helper thread; cleaning up in case of errors */
+static int
+changelog_spawn_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ priv->cr.this = this;
+ ret = gf_thread_create (&priv->cr.rollover_th,
+ NULL, changelog_rollover, priv);
+ if (ret)
+ goto out;
+
+ if (priv->fsync_interval) {
+ priv->cf.this = this;
+ ret = gf_thread_create (&priv->cf.fsync_th,
+ NULL, changelog_fsync_thread, priv);
+ }
+
+ if (ret)
+ changelog_cleanup_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+/* cleanup the notifier thread */
+static int
+changelog_cleanup_notifier (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ if (priv->cn.notify_th) {
+ changelog_thread_cleanup (this, priv->cn.notify_th);
+ priv->cn.notify_th = 0;
+
+ ret = close (priv->wfd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error closing writer end of notifier pipe"
+ " (reason: %s)", strerror (errno));
+ }
+
+ return ret;
+}
+
+/* spawn the notifier thread - nop if already running */
+static int
+changelog_spawn_notifier (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ int flags = 0;
+ int pipe_fd[2] = {0, 0};
+
+ if (priv->cn.notify_th)
+ goto out; /* notifier thread already running */
+
+ ret = pipe (pipe_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot create pipe (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ /* writer is non-blocking */
+ flags = fcntl (pipe_fd[1], F_GETFL);
+ flags |= O_NONBLOCK;
+
+ ret = fcntl (pipe_fd[1], F_SETFL, flags);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set O_NONBLOCK flag");
+ goto out;
+ }
+
+ priv->wfd = pipe_fd[1];
+
+ priv->cn.this = this;
+ priv->cn.rfd = pipe_fd[0];
+
+ ret = gf_thread_create (&priv->cn.notify_th,
+ NULL, changelog_notifier, priv);
+
+ out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_changelog_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+changelog_init (xlator_t *this, changelog_priv_t *priv)
+{
+ int i = 0;
+ int ret = -1;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+
+ ret = gettimeofday (&tv, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gettimeofday() failure");
+ goto out;
+ }
+
+ priv->slice.tv_start = tv;
+
+ priv->maps[CHANGELOG_TYPE_DATA] = "D ";
+ priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
+ priv->maps[CHANGELOG_TYPE_ENTRY] = "E ";
+
+ for (; i < CHANGELOG_MAX_TYPE; i++) {
+ /* start with version 1 */
+ priv->slice.changelog_version[i] = 1;
+ }
+
+ if (!priv->active)
+ return ret;
+
+ /* spawn the notifier thread */
+ ret = changelog_spawn_notifier (this, priv);
+ if (ret)
+ goto out;
+
+ /**
+ * start with a fresh changelog file every time. this is done
+ * in case there was an encoding change. so... things are kept
+ * simple here.
+ */
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if (ret)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ }
+ UNLOCK (&priv->lock);
+
+ /* ... and finally spawn the helpers threads */
+ ret = changelog_spawn_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+ gf_boolean_t active_earlier = _gf_true;
+ gf_boolean_t active_now = _gf_true;
+ changelog_time_slice_t *slice = NULL;
+ changelog_log_data_t cld = {0,};
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = -1;
+ active_earlier = priv->active;
+
+ /* first stop the rollover and the fsync thread */
+ changelog_cleanup_helper_threads (this, priv);
+
+ GF_OPTION_RECONF ("changelog-dir", tmp, options, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-dir\" option is not set");
+ goto out;
+ }
+
+ GF_FREE (priv->changelog_dir);
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto out;
+
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+ if (ret)
+ goto out;
+
+ GF_OPTION_RECONF ("changelog", active_now, options, bool, out);
+
+ /**
+ * changelog_handle_change() handles changes that could possibly
+ * have been submit changes before changelog deactivation.
+ */
+ if (!active_now)
+ priv->active = _gf_false;
+
+ GF_OPTION_RECONF ("op-mode", tmp, options, str, out);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_RECONF ("encoding", tmp, options, str, out);
+ changelog_assign_encoding (priv, tmp);
+
+ GF_OPTION_RECONF ("rollover-time",
+ priv->rollover_time, options, int32, out);
+ GF_OPTION_RECONF ("fsync-interval",
+ priv->fsync_interval, options, int32, out);
+
+ if (active_now || active_earlier) {
+ ret = changelog_fill_rollover_data (&cld, !active_now);
+ if (ret)
+ goto out;
+
+ slice = &priv->slice;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret && active_now)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+
+ if (ret)
+ goto out;
+
+ if (active_now) {
+ ret = changelog_spawn_notifier (this, priv);
+ if (!ret)
+ ret = changelog_spawn_helper_threads (this,
+ priv);
+ } else
+ ret = changelog_cleanup_notifier (this, priv);
+ }
+
+ out:
+ if (ret) {
+ ret = changelog_cleanup_notifier (this, priv);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "changelog reconfigured");
+ if (active_now)
+ priv->active = _gf_true;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator needs a single subvolume");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dangling volume. please check volfile");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ this->local_pool = mem_pool_new (changelog_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local memory pool");
+ goto out;
+ }
+
+ LOCK_INIT (&priv->lock);
+
+ GF_OPTION_INIT ("changelog-brick", tmp, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-brick\" option is not set");
+ goto out;
+ }
+
+ priv->changelog_brick = gf_strdup (tmp);
+ if (!priv->changelog_brick)
+ goto out;
+ tmp = NULL;
+
+ GF_OPTION_INIT ("changelog-dir", tmp, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-dir\" option is not set");
+ goto out;
+ }
+
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto out;
+ tmp = NULL;
+
+ /**
+ * create the directory even if change-logging would be inactive
+ * so that consumers can _look_ into it (finding nothing...)
+ */
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("changelog", priv->active, bool, out);
+
+ GF_OPTION_INIT ("op-mode", tmp, str, out);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_INIT ("encoding", tmp, str, out);
+ changelog_assign_encoding (priv, tmp);
+
+ GF_OPTION_INIT ("rollover-time", priv->rollover_time, int32, out);
+
+ GF_OPTION_INIT ("fsync-interval", priv->fsync_interval, int32, out);
+
+ changelog_encode_change(priv);
+
+ GF_ASSERT (cb_bootstrap[priv->op_mode].mode == priv->op_mode);
+ priv->cb = &cb_bootstrap[priv->op_mode];
+
+ /* ... now bootstrap the logger */
+ ret = priv->cb->ctor (this, &priv->cd);
+ if (ret)
+ goto out;
+
+ priv->changelog_fd = -1;
+ ret = changelog_init (this, priv);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "changelog translator loaded");
+
+ out:
+ if (ret) {
+ if (this->local_pool)
+ mem_pool_destroy (this->local_pool);
+ if (priv->cb) {
+ ret = priv->cb->dtor (this, &priv->cd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error in cleanup during init()");
+ }
+ GF_FREE (priv->changelog_brick);
+ GF_FREE (priv->changelog_dir);
+ GF_FREE (priv);
+ this->private = NULL;
+ } else
+ this->private = priv;
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ int ret = -1;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv) {
+ ret = priv->cb->dtor (this, &priv->cd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error in fini");
+ mem_pool_destroy (this->local_pool);
+ GF_FREE (priv->changelog_brick);
+ GF_FREE (priv->changelog_dir);
+ GF_FREE (priv);
+ }
+
+ this->private = NULL;
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .mknod = changelog_mknod,
+ .mkdir = changelog_mkdir,
+ .create = changelog_create,
+ .symlink = changelog_symlink,
+ .writev = changelog_writev,
+ .truncate = changelog_truncate,
+ .ftruncate = changelog_ftruncate,
+ .link = changelog_link,
+ .rename = changelog_rename,
+ .unlink = changelog_unlink,
+ .rmdir = changelog_rmdir,
+ .setattr = changelog_setattr,
+ .fsetattr = changelog_fsetattr,
+ .setxattr = changelog_setxattr,
+ .fsetxattr = changelog_fsetxattr,
+ .removexattr = changelog_removexattr,
+ .fremovexattr = changelog_fremovexattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = changelog_forget,
+};
+
+struct volume_options options[] = {
+ {.key = {"changelog"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable change-logging"
+ },
+ {.key = {"changelog-brick"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "brick path to generate unique socket file name."
+ " should be the export directory of the volume strictly."
+ },
+ {.key = {"changelog-dir"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "directory for the changelog files"
+ },
+ {.key = {"op-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "realtime",
+ .value = {"realtime"},
+ .description = "operation mode - futuristic operation modes"
+ },
+ {.key = {"encoding"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "ascii",
+ .value = {"binary", "ascii"},
+ .description = "encoding type for changelogs"
+ },
+ {.key = {"rollover-time"},
+ .default_value = "60",
+ .type = GF_OPTION_TYPE_TIME,
+ .description = "time to switch to a new changelog file (in seconds)"
+ },
+ {.key = {"fsync-interval"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "0",
+ .description = "do not open CHANGELOG file with O_SYNC mode."
+ " instead perform fsync() at specified intervals"
+ },
+ {.key = {NULL}
+ },
+};
diff --git a/xlators/protocol/legacy/server/Makefile.am b/xlators/features/compress/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/xlators/protocol/legacy/server/Makefile.am
+++ b/xlators/features/compress/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am
new file mode 100644
index 000000000..4a64b52a9
--- /dev/null
+++ b/xlators/features/compress/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = cdc.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = cdc.h cdc-mem-types.h
+
+cdc_la_LDFLAGS = -module -avoidversion $(LIBZ_LIBS)
+
+cdc_la_SOURCES = cdc.c cdc-helper.c
+cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+-shared -nostartfiles $(LIBZ_CFLAGS)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c
new file mode 100644
index 000000000..54432ff45
--- /dev/null
+++ b/xlators/features/compress/src/cdc-helper.c
@@ -0,0 +1,547 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#ifdef HAVE_LIB_Z
+/* gzip header looks something like this
+ * (RFC 1950)
+ *
+ * +---+---+---+---+---+---+---+---+---+---+
+ * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
+ * +---+---+---+---+---+---+---+---+---+---+
+ *
+ * Data is usually sent without this header i.e
+ * Data sent = <compressed-data> + trailer(8)
+ * The trailer contains the checksum.
+ *
+ * gzip_header is added only during debugging.
+ * Refer to the function cdc_dump_iovec_to_disk
+ */
+static const char gzip_header[10] =
+ {
+ '\037', '\213', Z_DEFLATED, 0,
+ 0, 0, 0, 0,
+ 0, GF_CDC_OS_ID
+ };
+
+static int32_t
+cdc_next_iovec (xlator_t *this, cdc_info_t *ci)
+{
+ int ret = -1;
+
+ ci->ncount++;
+ /* check for iovec overflow -- should not happen */
+ if (ci->ncount == MAX_IOVEC) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib output buffer overflow"
+ " ->ncount (%d) | ->MAX_IOVEC (%d)",
+ ci->ncount, MAX_IOVEC);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_put_long (unsigned char *string, unsigned long x)
+{
+ string[0] = (unsigned char) (x & 0xff);
+ string[1] = (unsigned char) ((x & 0xff00) >> 8);
+ string[2] = (unsigned char) ((x & 0xff0000) >> 16);
+ string[3] = (unsigned char) ((x & 0xff000000) >> 24);
+}
+
+static unsigned long
+cdc_get_long (unsigned char *buf)
+{
+ return ((unsigned long) buf[0])
+ | (((unsigned long) buf[1]) << 8)
+ | (((unsigned long) buf[2]) << 16)
+ | (((unsigned long) buf[3]) << 24);
+}
+
+static int32_t
+cdc_init_gzip_trailer (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ char *buf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ buf = CURR_VEC(ci).iov_base =
+ (char *) GF_CALLOC (1, GF_CDC_VALIDATION_SIZE,
+ gf_cdc_mt_gzip_trailer_t);
+
+ if (!CURR_VEC(ci).iov_base)
+ goto out;
+
+ CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE;
+
+ cdc_put_long ((unsigned char *)&buf[0], ci->crc);
+ cdc_put_long ((unsigned char *)&buf[4], ci->stream.total_in);
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static int32_t
+cdc_alloc_iobuf_and_init_vec (xlator_t *this,
+ cdc_priv_t *priv, cdc_info_t *ci,
+ int size)
+{
+ int ret = -1;
+ int alloc_len = 0;
+ struct iobuf *iobuf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ alloc_len = size ? size : ci->buffer_size;
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, alloc_len);
+ if (!iobuf)
+ goto out;
+
+ ret = iobref_add (ci->iobref, iobuf);
+ if (ret)
+ goto out;
+
+ /* Initialize this iovec */
+ CURR_VEC(ci).iov_base = iobuf->ptr;
+ CURR_VEC(ci).iov_len = alloc_len;
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_init_zlib_output_stream (cdc_priv_t *priv, cdc_info_t *ci, int size)
+{
+ ci->stream.next_out = (unsigned char *) CURR_VEC(ci).iov_base;
+ ci->stream.avail_out = size ? size : ci->buffer_size;
+}
+
+/* This routine is for testing and debugging only.
+ * Data written = header(10) + <compressed-data> + trailer(8)
+ * So each gzip dump file is at least 18 bytes in size.
+ */
+void
+cdc_dump_iovec_to_disk (xlator_t *this, cdc_info_t *ci, const char *file)
+{
+ int i = 0;
+ int fd = 0;
+ size_t writen = 0;
+ size_t total_writen = 0;
+
+ fd = open (file, O_WRONLY|O_CREAT|O_TRUNC, 0777 );
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot open file: %s", file);
+ return;
+ }
+
+ writen = write (fd, (char *) gzip_header, 10);
+ total_writen += writen;
+ for (i = 0; i < ci->ncount; i++) {
+ writen = write (fd, (char *) ci->vec[i].iov_base, ci->vec[i].iov_len);
+ total_writen += writen;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dump'd %zu bytes to %s", total_writen, GF_CDC_DEBUG_DUMP_FILE );
+
+ close (fd);
+}
+
+static int32_t
+cdc_flush_libz_buffer (cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci,
+ int (*libz_func)(z_streamp, int),
+ int flush)
+{
+ int32_t ret = Z_OK;
+ int done = 0;
+ unsigned int deflate_len = 0;
+
+ for (;;) {
+ deflate_len = ci->buffer_size - ci->stream.avail_out;
+
+ if (deflate_len != 0) {
+ CURR_VEC(ci).iov_len = deflate_len;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret) {
+ ret = Z_MEM_ERROR;
+ break;
+ }
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ if (done) {
+ ci->ncount--;
+ break;
+ }
+
+ ret = libz_func (&ci->stream, flush);
+
+ if (ret == Z_BUF_ERROR) {
+ ret = Z_OK;
+ ci->ncount--;
+ break;
+ }
+
+ done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END);
+
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ break;
+ }
+
+ return ret;
+}
+
+static int32_t
+do_cdc_compress (struct iovec *vec, xlator_t *this, cdc_priv_t *priv,
+ cdc_info_t *ci)
+{
+ int ret = -1;
+
+ /* Initialize defalte */
+ ret = deflateInit2 (&ci->stream, priv->cdc_level, Z_DEFLATED,
+ priv->window_size, priv->mem_level,
+ Z_DEFAULT_STRATEGY);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to init Zlib (retval: %d)", ret);
+ goto out;
+ }
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) vec->iov_base;
+ ci->stream.avail_in = vec->iov_len;
+
+ ci->crc = crc32 (ci->crc, (const Bytef *) vec->iov_base, vec->iov_len);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d",
+ ci->crc, ci->stream.avail_in, ci->buffer_size);
+
+ /* compress !! */
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = deflate (&ci->stream, Z_NO_FLUSH);
+ if (ret != Z_OK)
+ break;
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_compress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t **xdata)
+{
+ int ret = -1;
+ int i = 0;
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto out;
+
+ if (!*xdata) {
+ *xdata = dict_new ();
+ if (!*xdata) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot allocate xdata"
+ " dict");
+ goto out;
+ }
+ }
+
+ /* data */
+ for (i = 0; i < ci->count; i++) {
+ ret = do_cdc_compress (&ci->vector[i], this, priv, ci);
+ if (ret != Z_OK)
+ goto deflate_cleanup_out;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, deflate, Z_FINISH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Compression Error: ret (%d)", ret);
+ ret = -1;
+ goto deflate_cleanup_out;
+ }
+
+ /* trailer */
+ ret = cdc_init_gzip_trailer (this, priv, ci);
+ if (ret)
+ goto deflate_cleanup_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Compressed %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE;
+
+ /* set deflated canary value for identification */
+ ret = dict_set_int32 (*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1);
+ if (ret) {
+ /* Send uncompressed data if we can't _tell_ the client
+ * that deflated data is on it's way. So, we just log
+ * the faliure and continue as usual.
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "Data deflated, but could not set canary"
+ " value in dict for identification");
+ }
+
+ /* This is to be used in testing */
+ if ( priv->debug ) {
+ cdc_dump_iovec_to_disk (this, ci, GF_CDC_DEBUG_DUMP_FILE );
+ }
+
+ deflate_cleanup_out:
+ (void) deflateEnd(&ci->stream);
+
+ out:
+ return ret;
+}
+
+
+/* deflate content is checked by the presence of a canary
+ * value in the dict as the key
+ */
+static int32_t
+cdc_check_content_for_deflate (dict_t *xdata)
+{
+ return dict_get (xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0;
+}
+
+static unsigned long
+cdc_extract_crc (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[0]);
+}
+
+static unsigned long
+cdc_extract_size (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[4]);
+}
+
+static int32_t
+cdc_validate_inflate (cdc_info_t *ci, unsigned long crc,
+ unsigned long len)
+{
+ return !((crc == ci->crc)
+ /* inflated length is hidden inside
+ * Zlib stream struct */
+ && (len == ci->stream.total_out));
+}
+
+static int32_t
+do_cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ int i = 0;
+ int len = 0;
+ char *inflte = NULL;
+ char *trailer = NULL;
+ struct iovec vec = {0,};
+ unsigned long computed_crc = 0;
+ unsigned long computed_len = 0;
+
+ ret = inflateInit2 (&ci->stream, priv->window_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib: Unable to initialize inflate");
+ goto out;
+ }
+
+ vec = THIS_VEC(ci, 0);
+
+ trailer = (char *) (((char *) vec.iov_base) + vec.iov_len
+ - GF_CDC_VALIDATION_SIZE);
+
+ /* CRC of uncompressed data */
+ computed_crc = cdc_extract_crc (trailer);
+
+ /* size of uncomrpessed data */
+ computed_len = cdc_extract_size (trailer);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d",
+ computed_crc, computed_len, ci->buffer_size);
+
+ inflte = vec.iov_base ;
+ len = vec.iov_len - GF_CDC_VALIDATION_SIZE;
+
+ /* allocate buffer of the original length of the data */
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) inflte;
+ ci->stream.avail_in = len;
+
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = inflate (&ci->stream, Z_NO_FLUSH);
+ if (ret == Z_STREAM_ERROR)
+ break;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, inflate, Z_SYNC_FLUSH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Decompression Error: ret (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+
+ /* compute CRC of the uncompresses data to check for
+ * correctness */
+
+ for (i = 0; i < ci->ncount; i++) {
+ ci->crc = crc32 (ci->crc,
+ (const Bytef *) ci->vec[i].iov_base,
+ ci->vec[i].iov_len);
+ }
+
+ /* validate inflated data */
+ ret = cdc_validate_inflate (ci, computed_crc, computed_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Checksum or length mismatched in inflated data");
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+
+ /* check for deflate content */
+ if (!cdc_check_content_for_deflate (xdata)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Content not deflated, passing through ...");
+ goto passthrough_out;
+ }
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto passthrough_out;
+
+ /* do we need to do this? can we assume that one iovec
+ * will hold per request data everytime?
+ *
+ * server/client protocol seems to deal with a single
+ * iovec even if op_ret > 1M. So, it looks ok to
+ * assume that a single iovec will contain all the
+ * data (This saves us a lot from finding the trailer
+ * and the data since it could have been split-up onto
+ * two adjacent iovec's.
+ *
+ * But, in case this translator is loaded above quick-read
+ * for some reason, then it's entirely possible that we get
+ * multiple iovec's...
+ *
+ * This case (handled below) is not tested. (by loading the
+ * xlator below quick-read)
+ */
+
+ /* @@ I_HOPE_THIS_IS_NEVER_HIT */
+ if (ci->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING, "unable to handle"
+ " multiple iovecs (%d in number)", ci->count);
+ goto inflate_cleanup_out;
+ /* TODO: coallate all iovecs in one */
+ }
+
+ ret = do_cdc_decompress (this, priv, ci);
+ if (ret)
+ goto inflate_cleanup_out;
+
+ ci->nbytes = ci->stream.total_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Inflated %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ inflate_cleanup_out:
+ (void) inflateEnd (&ci->stream);
+
+ passthrough_out:
+ return ret;
+}
+
+#endif
diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h
new file mode 100644
index 000000000..efa008059
--- /dev/null
+++ b/xlators/features/compress/src/cdc-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_MEM_TYPES_H
+#define __CDC_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_cdc_mem_types {
+ gf_cdc_mt_priv_t = gf_common_mt_end + 1,
+ gf_cdc_mt_vec_t = gf_common_mt_end + 2,
+ gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3,
+};
+
+#endif
diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c
new file mode 100644
index 000000000..eb7d87c56
--- /dev/null
+++ b/xlators/features/compress/src/cdc.c
@@ -0,0 +1,342 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+static void
+cdc_cleanup_iobref (cdc_info_t *ci)
+{
+ assert(ci->iobref != NULL);
+ iobref_clear (ci->iobref);
+}
+
+int32_t
+cdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ if (op_ret <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0)
+ && (op_ret < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = op_ret;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A readv compresses on the server side and decompresses on the client side
+ */
+ if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid operation mode (%d)", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_UNWIND_STRICT (readv, frame, ci.nbytes, op_errno,
+ ci.vec, ci.ncount, stbuf, iobref,
+ xdata);
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+cdc_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata)
+{
+ fop_readv_cbk_t cbk = NULL;
+
+#ifdef HAVE_LIB_Z
+ cbk = cdc_readv_cbk;
+#else
+ cbk = default_readv_cbk;
+#endif
+ STACK_WIND (frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+ size_t isize = 0;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ isize = iov_length(vector, count);
+
+ if (isize <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0)
+ && (isize < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = isize;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A writev compresses on the client side and decompresses on the server side
+ */
+ if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, ci.vec, ci.ncount, offset, flags,
+ iobref, xdata);
+
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *temp_str = NULL;
+ cdc_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Need subvolume == 1");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Dangling volume. Check volfile");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_cdc_mt_priv_t);
+ if (!priv) {
+ goto err;
+ }
+
+ /* Check if debug mode is turned on */
+ GF_OPTION_INIT ("debug", priv->debug, bool, err);
+ if( priv->debug ) {
+ gf_log (this->name, GF_LOG_DEBUG, "CDC debug option turned on");
+ }
+
+ /* Set Gzip Window Size */
+ GF_OPTION_INIT ("window-size", priv->window_size, int32, err);
+ if ( (priv->window_size > GF_CDC_MAX_WINDOWSIZE)
+ || (priv->window_size < GF_CDC_DEF_WINDOWSIZE) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip window size (%d), using default",
+ priv->window_size);
+ priv->window_size = GF_CDC_DEF_WINDOWSIZE;
+ }
+
+ /* Set Gzip (De)Compression Level */
+ GF_OPTION_INIT ("compression-level", priv->cdc_level, int32, err);
+ if ( ((priv->cdc_level < 1) || (priv->cdc_level > 9))
+ && (priv->cdc_level != GF_CDC_DEF_COMPRESSION) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip (de)compression level (%d),"
+ " using default", priv->cdc_level);
+ priv->cdc_level = GF_CDC_DEF_COMPRESSION;
+ }
+
+ /* Set Gzip Memory Level */
+ GF_OPTION_INIT ("mem-level", priv->mem_level, int32, err);
+ if ( (priv->mem_level < 1) || (priv->mem_level > 9) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip memory level, using the default");
+ priv->mem_level = GF_CDC_DEF_MEMLEVEL;
+ }
+
+ /* Set min file size to enable compression */
+ GF_OPTION_INIT ("min-size", priv->min_file_size, int32, err);
+
+ /* Mode of operation - Server/Client */
+ ret = dict_get_str (this->options, "mode", &temp_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Operation mode not specified !!");
+ goto err;
+ }
+
+ if (GF_CDC_MODE_IS_CLIENT (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_CLIENT;
+ } else if (GF_CDC_MODE_IS_SERVER (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_SERVER;
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Bogus operation mode (%s) specified", temp_str);
+ goto err;
+ }
+
+ this->private = priv;
+ gf_log (this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode",temp_str);
+ return 0;
+
+ err:
+ if (priv)
+ GF_FREE (priv);
+
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ cdc_priv_t *priv = this->private;
+
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .readv = cdc_readv,
+ .writev = cdc_writev,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"window-size"},
+ .default_value = "-15",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Size of the zlib history buffer."
+ },
+ { .key = {"mem-level"},
+ .default_value = "8",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Memory allocated for internal compression state.\
+ 1 uses minimum memory but is slow and reduces \
+ compression ratio; memLevel=9 uses maximum memory \
+ for optimal speed. The default value is 8."
+ },
+ { .key = {"compression-level"},
+ .default_value = "-1",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Compression levels \
+ 0 : no compression, 1 : best speed, \
+ 9 : best compression, -1 : default compression "
+ },
+ { .key = {"min-size"},
+ .default_value = "0",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Data is compressed only when its size exceeds this."
+ },
+ { .key = {"mode"},
+ .value = {"server", "client"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Set on the basis of where the xlator is loaded."
+ },
+ { .key = {"debug"},
+ .default_value = "false",
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "This is used in testing. Will dump compressed data \
+ to disk as a gzip file."
+ },
+ { .key = {NULL}
+ },
+};
diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h
new file mode 100644
index 000000000..71f4d2317
--- /dev/null
+++ b/xlators/features/compress/src/cdc.h
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_H
+#define __CDC_H
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#include "xlator.h"
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif
+
+typedef struct cdc_priv {
+ int window_size;
+ int mem_level;
+ int cdc_level;
+ int min_file_size;
+ int op_mode;
+ gf_boolean_t debug;
+ gf_lock_t lock;
+} cdc_priv_t;
+
+typedef struct cdc_info {
+ /* input bits */
+ int count;
+ int32_t ibytes;
+ struct iovec *vector;
+ struct iatt *buf;
+
+ /* output bits */
+ int ncount;
+ int nbytes;
+ int buffer_size;
+ struct iovec vec[MAX_IOVEC];
+ struct iobref *iobref;
+
+ /* zlib bits */
+#ifdef HAVE_LIB_Z
+ z_stream stream;
+#endif
+ unsigned long crc;
+} cdc_info_t;
+
+#define NVEC(ci) (ci->ncount - 1)
+#define CURR_VEC(ci) ci->vec[ci->ncount - 1]
+#define THIS_VEC(ci, i) ci->vector[i]
+
+/* Gzip defaults */
+#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */
+#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */
+
+#ifdef HAVE_LIB_Z
+#define GF_CDC_DEF_COMPRESSION Z_DEFAULT_COMPRESSION
+#else
+#define GF_CDC_DEF_COMPRESSION -1
+#endif
+
+#define GF_CDC_DEF_MEMLEVEL 8
+#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size
+
+/* Operation mode
+ * If xlator is loaded on client, readv decompresses and writev compresses
+ * If xlator is loaded on server, readv compresses and writev decompresses
+ */
+#define GF_CDC_MODE_CLIENT 0
+#define GF_CDC_MODE_SERVER 1
+
+/* min size of data to do cmpression
+ * 0 == compress even 1byte
+ */
+#define GF_CDC_MIN_CHUNK_SIZE 0
+
+#define GF_CDC_VALIDATION_SIZE 8
+
+#define GF_CDC_OS_ID 0xFF
+#define GF_CDC_DEFLATE_CANARY_VAL "deflate"
+#define GF_CDC_DEBUG_DUMP_FILE "/tmp/cdcdump.gz"
+
+#define GF_CDC_MODE_IS_CLIENT(m) \
+ (strcmp (m, "client") == 0)
+
+#define GF_CDC_MODE_IS_SERVER(m) \
+ (strcmp (m, "server") == 0)
+
+int32_t
+cdc_compress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t **xdata);
+int32_t
+cdc_decompress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t *xdata);
+
+#endif
diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am
index d473b9ea1..d1fda8b0a 100644
--- a/xlators/features/filter/src/Makefile.am
+++ b/xlators/features/filter/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = filter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-filter_la_LDFLAGS = -module -avoidversion
+filter_la_LDFLAGS = -module -avoid-version
filter_la_SOURCES = filter.c
filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = filter-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/filter/src/filter-mem-types.h b/xlators/features/filter/src/filter-mem-types.h
index de2cb9665..47a17249b 100644
--- a/xlators/features/filter/src/filter-mem-types.h
+++ b/xlators/features/filter/src/filter-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __FILTER_MEM_TYPES_H__
#define __FILTER_MEM_TYPES_H__
diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c
index eda042f35..1d4887b71 100644
--- a/xlators/features/filter/src/filter.c
+++ b/xlators/features/filter/src/filter.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/performance/stat-prefetch/Makefile.am b/xlators/features/gfid-access/Makefile.am
index af437a64d..af437a64d 100644
--- a/xlators/performance/stat-prefetch/Makefile.am
+++ b/xlators/features/gfid-access/Makefile.am
diff --git a/xlators/features/gfid-access/src/Makefile.am b/xlators/features/gfid-access/src/Makefile.am
new file mode 100644
index 000000000..db53affaa
--- /dev/null
+++ b/xlators/features/gfid-access/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = gfid-access.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+gfid_access_la_LDFLAGS = -module -avoid-version
+
+gfid_access_la_SOURCES = gfid-access.c
+gfid_access_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = gfid-access.h gfid-access-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h
new file mode 100644
index 000000000..168d67b43
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GFID_ACCESS_MEM_TYPES_H
+#define _GFID_ACCESS_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_gfid_access_mt_priv_t = gf_common_mt_end + 1,
+ gf_gfid_access_mt_gfid_t,
+ gf_gfid_access_mt_end
+};
+
+#endif
+
diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c
new file mode 100644
index 000000000..da0ba7e50
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.c
@@ -0,0 +1,1172 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "gfid-access.h"
+#include "inode.h"
+#include "byte-order.h"
+
+
+
+void
+ga_newfile_args_free (ga_newfile_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ if (S_ISLNK (args->st_mode) && args->args.symlink.linkpath) {
+ GF_FREE (args->args.symlink.linkpath);
+ args->args.symlink.linkpath = NULL;
+ }
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+void
+ga_heal_args_free (ga_heal_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+ga_newfile_args_t *
+ga_newfile_parse_args (xlator_t *this, data_t *data)
+{
+ ga_newfile_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ int len = 0;
+ int blob_len = 0;
+ int min_len = 0;
+ void *blob = NULL;
+
+ priv = this->private;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ min_len = sizeof (args->uid) + sizeof (args->gid) + sizeof (args->gfid)
+ + sizeof (args->st_mode) + 2 + 2;
+ if (blob_len < min_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid length: Total length is less "
+ "than minimum length.");
+ goto err;
+ }
+
+ args = mem_get0 (priv->newfile_args_pool);
+ if (args == NULL)
+ goto err;
+
+ args->uid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ args->gid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ args->st_mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len)
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. No null byte present.",
+ args->gfid);
+ goto err;
+ }
+
+ args->bname = GF_CALLOC (1, (len + 1), gf_common_mt_char);
+ if (args->bname == NULL)
+ goto err;
+
+ memcpy (args->bname, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+
+ if (S_ISDIR (args->st_mode)) {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.umask = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+ if (blob_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ } else if (S_ISLNK (args->st_mode)) {
+ len = strnlen (blob, blob_len);
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.symlink.linkpath = GF_CALLOC (1, len + 1,
+ gf_common_mt_char);
+ if (args->args.symlink.linkpath == NULL)
+ goto err;
+
+ memcpy (args->args.symlink.linkpath, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+ } else {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.rdev = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.umask = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+ }
+
+ if (blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+
+ return args;
+
+err:
+ if (args)
+ ga_newfile_args_free (args);
+
+ return NULL;
+}
+
+ga_heal_args_t *
+ga_heal_parse_args (xlator_t *this, data_t *data)
+{
+ ga_heal_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ void *blob = NULL;
+ int len = 0;
+ int blob_len = 0;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ priv = this->private;
+
+ /* bname should at least contain a character */
+ if (blob_len < (sizeof (args->gfid) + 2))
+ goto err;
+
+ args = mem_get0 (priv->heal_args_pool);
+ if (!args)
+ goto err;
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len)
+ goto err;
+
+ args->bname = GF_CALLOC (1, len + 1, gf_common_mt_char);
+ if (!args->bname)
+ goto err;
+
+ memcpy (args->bname, blob, len);
+ blob_len -= (len + 1);
+
+ if (blob_len)
+ goto err;
+
+ return args;
+
+err:
+ if (args)
+ ga_heal_args_free (args);
+
+ return NULL;
+}
+
+static int32_t
+ga_fill_tmp_loc (loc_t *loc, xlator_t *this, char *gfid,
+ char *bname, dict_t *xdata, loc_t *new_loc)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *parent = NULL;
+
+ parent = loc->inode;
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (!ret) {
+ parent = (void *)value;
+ }
+
+ /* parent itself should be looked up */
+ uuid_copy (new_loc->pargfid, parent->gfid);
+ new_loc->parent = inode_ref (parent);
+
+ new_loc->inode = inode_grep (parent->table, parent, bname);
+ if (!new_loc->inode)
+ new_loc->inode = inode_new (parent->table);
+
+ loc_path (new_loc, bname);
+ new_loc->name = basename (new_loc->path);
+
+ /* As GFID would not be set on the entry yet, lets not send entry
+ gfid in the request */
+ /*uuid_copy (new_loc->gfid, (const unsigned char *)gfid); */
+
+ ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+
+static gf_boolean_t
+__is_gfid_access_dir (uuid_t gfid)
+{
+ uuid_t aux_gfid;
+
+ memset (aux_gfid, 0, 16);
+ aux_gfid[15] = GF_AUX_GFID;
+
+ if (uuid_compare (gfid, aux_gfid) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+ga_forget (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *tmp_inode = NULL;
+
+ ret = inode_ctx_del (inode, this, &value);
+ if (ret)
+ goto out;
+
+ tmp_inode = (void *)value;
+ inode_unref (tmp_inode);
+
+out:
+ return 0;
+}
+
+
+static int
+ga_heal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stat, dict_t *dict,
+ struct iatt *postparent)
+{
+ call_frame_t *orig_frame = NULL;
+
+ orig_frame = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, dict);
+
+ return 0;
+}
+
+static int
+ga_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ call_frame_t *orig_frame = NULL;
+
+ orig_frame = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_new_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_newfile_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+ mode_t mode = 0;
+
+ args = ga_newfile_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = ga_fill_tmp_loc (loc, this, args->gfid,
+ args->bname, xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+ new_frame->local = (void *)frame;
+
+ new_frame->root->uid = args->uid;
+ new_frame->root->gid = args->gid;
+
+ if (S_ISDIR (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, args->args.mkdir.mode,
+ args->args.mkdir.umask, xdata);
+ } else if (S_ISLNK (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ args->args.symlink.linkpath,
+ &tmp_loc, 0, xdata);
+ } else {
+ /* use 07777 (4 7s) for considering the Sticky bits etc) */
+ mode = (S_IFMT & args->st_mode) |
+ (07777 | args->args.mknod.mode);;
+
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ &tmp_loc, mode,
+ args->args.mknod.rdev, args->args.mknod.umask,
+ xdata);
+ }
+
+ ret = 0;
+out:
+ ga_newfile_args_free (args);
+
+ return ret;
+}
+
+int32_t
+ga_heal_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_heal_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+
+ args = ga_heal_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = ga_fill_tmp_loc (loc, this, args->gfid, args->bname,
+ xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+ new_frame->local = (void *)frame;
+
+ STACK_WIND (new_frame, ga_heal_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->lookup,
+ &tmp_loc, xdata);
+
+ ret = 0;
+out:
+ if (args)
+ ga_heal_args_free (args);
+
+ return ret;
+}
+
+int32_t
+ga_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+ga_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+
+ data_t *data = NULL;
+ int op_errno = ENOMEM;
+ int ret = 0;
+ inode_t *unref = NULL;
+
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) &&
+ ((loc->parent &&
+ __is_root_gfid (loc->parent->gfid)) ||
+ __is_root_gfid (loc->pargfid))) {
+ op_errno = EPERM;
+ goto err;
+ }
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_NEWFILE);
+ if (data) {
+ ret = ga_new_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_HEAL);
+ if (data) {
+ ret = ga_heal_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ //If the inode is a virtual inode change the inode otherwise perform
+ //the operation on same inode
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, ga_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ int j = 0;
+ int i = 0;
+ int ret = 0;
+ uint64_t temp_ino = 0;
+ inode_t *cbk_inode = NULL;
+ inode_t *true_inode = NULL;
+ uuid_t random_gfid = {0,};
+
+ if (frame->local)
+ cbk_inode = frame->local;
+ else
+ cbk_inode = inode;
+
+ frame->local = NULL;
+ if (op_ret)
+ goto unwind;
+
+ if (!IA_ISDIR (buf->ia_type))
+ goto unwind;
+
+ /* need to send back a different inode for linking in itable */
+ if (cbk_inode == inode) {
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (inode->table, buf->ia_gfid);
+ if (!true_inode) {
+ cbk_inode = inode_new (inode->table);
+
+ if (!cbk_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ /* the inode is not present in itable, ie, the actual
+ path is not yet looked up. Use the current inode
+ itself for now */
+ inode_ref (inode);
+ } else {
+ /* 'inode_ref()' has been done in inode_find() */
+ inode = true_inode;
+ }
+
+ ret = inode_ctx_put (cbk_inode, this, (uint64_t)inode);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the inode ctx with"
+ "the actual inode");
+ if (inode)
+ inode_unref (inode);
+ }
+ inode = NULL;
+ }
+
+ if (!uuid_is_null (cbk_inode->gfid)) {
+ /* if the previous linked inode is used, use the
+ same gfid */
+ uuid_copy (random_gfid, cbk_inode->gfid);
+ } else {
+ /* replace the buf->ia_gfid to a random gfid
+ for directory, for files, what we received is fine */
+ uuid_generate (random_gfid);
+ }
+
+ uuid_copy (buf->ia_gfid, random_gfid);
+
+ for (i = 15; i > (15 - 8); i--) {
+ temp_ino += (uint64_t)(buf->ia_gfid[i]) << j;
+ j += 8;
+ }
+ buf->ia_ino = temp_ino;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, cbk_inode, buf,
+ xdata, postparent);
+
+ return 0;
+}
+
+int32_t
+ga_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ ga_private_t *priv = NULL;
+
+ /* if the entry in question is not 'root',
+ then follow the normal path */
+ if (op_ret || !__is_root_gfid(buf->ia_gfid))
+ goto unwind;
+
+ priv = this->private;
+
+ /* do we need to copy root stbuf everytime? */
+ /* mostly yes, as we want to have the 'stat' info show latest
+ in every _cbk() */
+
+ /* keep the reference for root stat buf */
+ priv->root_stbuf = *buf;
+ priv->gfiddir_stbuf = priv->root_stbuf;
+ priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID;
+ priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+ga_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+ uuid_t tmp_gfid = {0,};
+ loc_t tmp_loc = {0,};
+ uint64_t value = 0;
+ inode_t *inode = NULL;
+ inode_t *true_inode = NULL;
+ int32_t op_errno = ENOENT;
+
+ /* if its discover(), no need for any action here */
+ if (!loc->name)
+ goto wind;
+
+ /* if its revalidate, and inode is not of type directory,
+ proceed with 'wind' */
+ if (loc->inode && loc->inode->ia_type &&
+ !IA_ISDIR (loc->inode->ia_type))
+ goto wind;
+
+ priv = this->private;
+
+ /* need to check if the lookup is on virtual dir */
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) &&
+ ((loc->parent && __is_root_gfid (loc->parent->gfid)) ||
+ __is_root_gfid (loc->pargfid))) {
+ /* this means, the query is on '/.gfid', return the fake stat,
+ and say success */
+
+ STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+ }
+
+ /* now, check if the lookup() is on an existing entry,
+ but on gfid-path */
+ if (!((loc->parent && __is_gfid_access_dir (loc->parent->gfid)) ||
+ __is_gfid_access_dir (loc->pargfid)))
+ goto wind;
+
+ /* make sure the 'basename' is actually a 'canonical-gfid',
+ otherwise, return error */
+ ret = uuid_parse (loc->name, tmp_gfid);
+ if (ret)
+ goto err;
+
+ /* if its fresh lookup, go ahead and send it down, if not,
+ for directory, we need indirection to actual dir inode */
+ if (!(loc->inode && loc->inode->ia_type))
+ goto discover;
+
+ /* revalidate on directory */
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (ret)
+ goto err;
+
+ inode = (void *)value;
+
+ /* valid inode, already looked up, work on that */
+ if (inode->ia_type)
+ goto discover;
+
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (loc->inode->table, tmp_gfid);
+ if (true_inode) {
+ /* time do another lookup and update the context
+ with proper inode */
+ op_errno = ESTALE;
+ goto err;
+ }
+
+discover:
+ /* for the virtual entries, we don't need to send 'gfid-req' key, as
+ for these entries, we don't want to 'set' a new gfid */
+ if (xdata)
+ dict_del (xdata, "gfid-req");
+
+ uuid_copy (tmp_loc.gfid, tmp_gfid);
+
+ /* if revalidate, then we need to have the proper reference */
+ if (inode) {
+ tmp_loc.inode = inode_ref (inode);
+ frame->local = loc->inode;
+ } else {
+ tmp_loc.inode = inode_ref (loc->inode);
+ }
+
+ STACK_WIND (frame, ga_virtual_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ inode_unref (tmp_loc.inode);
+
+ return 0;
+
+wind:
+ /* used for all the normal lookup path */
+ STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+}
+
+int
+ga_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, loc->inode,
+ NULL, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int
+ga_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+
+}
+
+int
+ga_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev,
+ umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flag, xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *oldloc_unref = NULL;
+ inode_t *newloc_unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref,
+ handle_newloc);
+
+handle_newloc:
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind);
+
+wind:
+ STACK_WIND (frame, default_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ if (oldloc_unref)
+ inode_unref (oldloc_unref);
+
+ if (newloc_unref)
+ inode_unref (newloc_unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+
+int
+ga_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *oldloc_unref = NULL;
+ inode_t *newloc_unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref,
+ handle_newloc);
+
+handle_newloc:
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind);
+
+wind:
+ STACK_WIND (frame, default_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+
+ if (oldloc_unref)
+ inode_unref (oldloc_unref);
+
+ if (newloc_unref)
+ inode_unref (newloc_unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ /* also check if the loc->inode itself is virtual
+ inode, if yes, return with failure, mainly because we
+ can't handle all the readdirp and other things on it. */
+ if (inode_ctx_get (loc->inode, this, NULL) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ STACK_WIND (frame, default_opendir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_gfid_access_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ goto out;
+ }
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dangling volume. check volfile ");
+ }
+
+ /* TODO: define a mem-type structure */
+ priv = GF_CALLOC (1, sizeof (*priv), gf_gfid_access_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ priv->newfile_args_pool = mem_pool_new (ga_newfile_args_t, 512);
+ if (!priv->newfile_args_pool)
+ goto out;
+
+ priv->heal_args_pool = mem_pool_new (ga_heal_args_t, 512);
+ if (!priv->heal_args_pool)
+ goto out;
+
+ this->private = priv;
+
+ ret = 0;
+out:
+ if (ret && priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ GF_FREE (priv);
+ }
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ priv = this->private;
+ this->private = NULL;
+
+ if (priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ if (priv->heal_args_pool)
+ mem_pool_destroy (priv->heal_args_pool);
+ GF_FREE (priv);
+ }
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = ga_lookup,
+
+ /* entry fops */
+ .mkdir = ga_mkdir,
+ .mknod = ga_mknod,
+ .create = ga_create,
+ .symlink = ga_symlink,
+ .link = ga_link,
+ .unlink = ga_unlink,
+ .rmdir = ga_rmdir,
+ .rename = ga_rename,
+
+ /* handle any other directory operations here */
+ .opendir = ga_opendir,
+ .stat = ga_stat,
+ .setattr = ga_setattr,
+ .getxattr = ga_getxattr,
+ .removexattr = ga_removexattr,
+
+ /* special fop to handle more entry creations */
+ .setxattr = ga_setxattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = ga_forget,
+};
+
+struct volume_options options[] = {
+ /* This translator doesn't take any options, or provide any options */
+ { .key = {NULL} },
+};
diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h
new file mode 100644
index 000000000..e13c9b724
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.h
@@ -0,0 +1,128 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __GFID_ACCESS_H__
+#define __GFID_ACCESS_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "gfid-access-mem-types.h"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GF_FUSE_AUX_GFID_NEWFILE "glusterfs.gfid.newfile"
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+
+#define GF_GFID_KEY "GLUSTERFS_GFID"
+#define GF_GFID_DIR ".gfid"
+#define GF_AUX_GFID 0xd
+
+#define GFID_ACCESS_GET_VALID_DIR_INODE(x,l,unref,lbl) do { \
+ int ret = 0; \
+ uint64_t value = 0; \
+ inode_t *tmp_inode = NULL; \
+ \
+ /* if its an entry operation, on the virtual */ \
+ /* directory inode as parent, we need to handle */ \
+ /* it properly */ \
+ if (l->parent) { \
+ ret = inode_ctx_get (l->parent, x, &value); \
+ if (ret) \
+ goto lbl; \
+ tmp_inode = (inode_t *)value; \
+ unref = inode_ref (tmp_inode); \
+ l->parent = tmp_inode; \
+ /* if parent is virtual, no need to handle */ \
+ /* loc->inode */ \
+ break; \
+ } \
+ \
+ /* if its an inode operation, on the virtual */ \
+ /* directory inode itself, we need to handle */ \
+ /* it properly */ \
+ if (l->inode) { \
+ ret = inode_ctx_get (l->inode, x, &value); \
+ if (ret) \
+ goto lbl; \
+ tmp_inode = (inode_t *)value; \
+ unref = inode_ref (tmp_inode); \
+ l->inode = tmp_inode; \
+ } \
+ \
+ } while (0)
+
+#define GFID_ACCESS_ENTRY_OP_CHECK(loc,err,lbl) do { \
+ /* need to check if the lookup is on virtual dir */ \
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && \
+ ((loc->parent && \
+ __is_root_gfid (loc->parent->gfid)) || \
+ __is_root_gfid (loc->pargfid))) { \
+ err = EEXIST; \
+ goto lbl; \
+ } \
+ \
+ /* now, check if the lookup() is on an existing */ \
+ /* entry, but on gfid-path */ \
+ if ((loc->parent && \
+ __is_gfid_access_dir (loc->parent->gfid)) || \
+ __is_gfid_access_dir (loc->pargfid)) { \
+ err = EPERM; \
+ goto lbl; \
+ } \
+ } while (0)
+
+
+typedef struct {
+ unsigned int uid;
+ unsigned int gid;
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ unsigned int st_mode;
+ char *bname;
+
+ union {
+ struct _symlink_in {
+ char *linkpath;
+ } __attribute__ ((__packed__)) symlink;
+
+ struct _mknod_in {
+ unsigned int mode;
+ unsigned int rdev;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mknod;
+
+ struct _mkdir_in {
+ unsigned int mode;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mkdir;
+ } __attribute__ ((__packed__)) args;
+} __attribute__((__packed__)) ga_newfile_args_t;
+
+typedef struct {
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ char *bname; /* a null terminated basename */
+} __attribute__((__packed__)) ga_heal_args_t;
+
+struct ga_private {
+ /* root inode's stbuf */
+ struct iatt root_stbuf;
+ struct iatt gfiddir_stbuf;
+ struct mem_pool *newfile_args_pool;
+ struct mem_pool *heal_args_pool;
+};
+typedef struct ga_private ga_private_t;
+
+#endif /* __GFID_ACCESS_H__ */
diff --git a/xlators/storage/bdb/Makefile.am b/xlators/features/glupy/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/xlators/storage/bdb/Makefile.am
+++ b/xlators/features/glupy/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md
new file mode 100644
index 000000000..2d7b30ef6
--- /dev/null
+++ b/xlators/features/glupy/doc/README.md
@@ -0,0 +1,44 @@
+This is just the very start for a GlusterFS[1] meta-translator that will
+allow translator code to be written in Python. It's based on the standard
+Python embedding (not extending) techniques, plus a dash of the ctypes module.
+The interface is a pretty minimal adaptation of the dispatches and callbacks
+from the C API[2] to Python, as follows:
+
+* Dispatch functions and callbacks must be defined on an "xlator" class
+ derived from gluster.Translator so that they'll be auto-registered with
+ the C translator during initialization.
+
+* For each dispatch or callback function you want to intercept, you define a
+ Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator.
+
+* The arguments for each operation are different, so you'll need to refer to
+ the C API. GlusterFS-specific types are used (though only loc\_t is fully
+ defined so far) and type correctness is enforced by ctypes.
+
+* If you do intercept a dispatch function, it is your responsibility to call
+ xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass
+ the request to the next translator. If you do not intercept a function, it
+ will default the same way as for C (pass through to the same operation with
+ the same arguments on the first child translator).
+
+* If you intercept a callback function, it is your responsibility to call
+ xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back
+ to the caller.
+
+So far only the lookup and create operations are handled this way, to support
+the "negative lookup" example. Now that the basic infrastructure is in place,
+adding more functions should be very quick, though with that much boilerplate I
+might pause to write a code generator. I also plan to add structure
+definitions and interfaces for some of the utility functions in libglusterfs
+(especially those having to do with inode and fd context) in the fairly near
+future. Note that you can also use ctypes to get at anything not explicitly
+exposed to Python already.
+
+_If you're coming here because of the Linux Journal article, please note that
+the code has evolved since that was written. The version that matches the
+article is here:_
+
+https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a
+
+[1] http://www.gluster.org
+[2] http://hekafs.org/dist/xlator_api_2.html
diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING
new file mode 100644
index 000000000..e05f17f49
--- /dev/null
+++ b/xlators/features/glupy/doc/TESTING
@@ -0,0 +1,9 @@
+Loading a translator written in Python using the glupy meta translator
+-------------------------------------------------------------------------------
+'test.vol' is a simple volfile with the debug-trace Python translator on top
+of a brick. The volfile can be mounted using the following command.
+
+$ glusterfs --debug -f test.vol /path/to/mntpt
+
+If then file operations are performed on the newly mounted file system, log
+output would be printed by the Python translator on the standard output.
diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol
new file mode 100644
index 000000000..0751a488c
--- /dev/null
+++ b/xlators/features/glupy/doc/test.vol
@@ -0,0 +1,10 @@
+volume vol-posix
+ type storage/posix
+ option directory /path/to/brick
+end-volume
+
+volume vol-glupy
+ type features/glupy
+ option module-name debug-trace
+ subvolumes vol-posix
+end-volume
diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am
new file mode 100644
index 000000000..960862839
--- /dev/null
+++ b/xlators/features/glupy/src/Makefile.am
@@ -0,0 +1,20 @@
+xlator_LTLIBRARIES = glupy.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+glupydir = $(xlatordir)/glupy
+
+glupy_PYTHON = gluster.py negative.py helloworld.py debug-trace.py
+
+glupy_la_LDFLAGS = -module -avoid-version -shared -nostartfiles
+glupy_la_SOURCES = glupy.c
+glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ -lpthread -l$(BUILD_PYTHON_LIB)
+
+noinst_HEADERS = glupy.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC)
+
+AM_CFLAGS = -Wall -fno-strict-aliasing -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/glupy/src/debug-trace.py b/xlators/features/glupy/src/debug-trace.py
new file mode 100644
index 000000000..53e76546b
--- /dev/null
+++ b/xlators/features/glupy/src/debug-trace.py
@@ -0,0 +1,774 @@
+import sys
+import stat
+from uuid import UUID
+from time import strftime, localtime
+from gluster import *
+# This translator was written primarily to test the fop entry point definitions
+# and structure definitions in 'gluster.py'.
+# It is similar to the debug-trace translator, one of the already available
+# translator types written in C, that logs the arguments passed to the fops and
+# their corresponding cbk functions.
+
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+dl.get_rootunique.restype = c_uint64
+dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+
+def st_mode_from_ia (prot, filetype):
+ st_mode = 0
+ type_bit = 0
+ prot_bit = 0
+
+ if filetype == IA_IFREG:
+ type_bit = stat.S_IFREG
+ elif filetype == IA_IFDIR:
+ type_bit = stat.S_IFDIR
+ elif filetype == IA_IFLNK:
+ type_bit = stat.S_IFLNK
+ elif filetype == IA_IFBLK:
+ type_bit = stat.S_IFBLK
+ elif filetype == IA_IFCHR:
+ type_bit = stat.S_IFCHR
+ elif filetype == IA_IFIFO:
+ type_bit = stat.S_IFIFO
+ elif filetype == IA_IFSOCK:
+ type_bit = stat.S_IFSOCK
+ elif filetype == IA_INVAL:
+ pass
+
+
+ if prot.suid:
+ prot_bit |= stat.S_ISUID
+ if prot.sgid:
+ prot_bit |= stat.S_ISGID
+ if prot.sticky:
+ prot_bit |= stat.S_ISVTX
+
+ if prot.owner.read:
+ prot_bit |= stat.S_IRUSR
+ if prot.owner.write:
+ prot_bit |= stat.S_IWUSR
+ if prot.owner.execn:
+ prot_bit |= stat.S_IXUSR
+
+ if prot.group.read:
+ prot_bit |= stat.S_IRGRP
+ if prot.group.write:
+ prot_bit |= stat.S_IWGRP
+ if prot.group.execn:
+ prot_bit |= stat.S_IXGRP
+
+ if prot.other.read:
+ prot_bit |= stat.S_IROTH
+ if prot.other.write:
+ prot_bit |= stat.S_IWOTH
+ if prot.other.execn:
+ prot_bit |= stat.S_IXOTH
+
+ st_mode = (type_bit | prot_bit)
+
+ return st_mode
+
+
+def trace_stat2str (buf):
+ gfid = uuid2str(buf.contents.ia_gfid)
+ mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type)
+ atime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_atime))
+ mtime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_mtime))
+ ctime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_ctime))
+ return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+
+ "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+
+ "ctime={10:s})").format(gfid, buf.contents.ia_no, mode,
+ buf.contents.ia_nlink,
+ buf.contents.ia_uid,
+ buf.contents.ia_gid,
+ buf.contents.ia_size,
+ buf.contents.ia_blocks,
+ atime_buf, mtime_buf,
+ ctime_buf)
+
+class xlator(Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+ self.gfids = {}
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " +
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent):
+ unique =dl.get_rootunique(frame)
+ key =dl.get_id(frame)
+ if op_ret == 0:
+ gfid = uuid2str(buf.contents.ia_gfid)
+ statstr = trace_stat2str(buf)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s}; " +
+ "*postparent={4:s}").format(unique, gfid,
+ op_ret, statstr,
+ postparentstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent)
+ return 0
+
+ def create_fop(self, frame, this, loc, flags, mode, umask, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " +
+ "umask=0{6:o}").format(unique, gfid, loc.contents.path,
+ fd, flags, mode, umask)
+ dl.wind_create(frame, POINTER(xlator_t)(), loc, flags,mode,
+ umask, fd, xdata)
+ return 0
+
+ def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret >= 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " +
+ "*preparent={5:s};" +
+ " *postparent={6:s}").format(unique, gfid, op_ret,
+ fd, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print ("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " +
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata)
+ return 0
+
+ def open_fop(self, frame, this, loc, flags, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}; fd={4:s}").format(unique, gfid,
+ loc.contents.path, flags,
+ fd)
+ self.gfids[key] = gfid
+ dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata)
+ return 0
+
+ def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; "
+ "op_errno={3:d}; *fd={4:s}").format(unique, gfid,
+ op_ret, op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd,
+ xdata)
+ return 0
+
+ def readv_fop(self, frame, this, fd, size, offset, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+
+ "fd={2:s}; size ={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, size, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset,
+ flags, xdata)
+ return 0
+
+ def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector,
+ count, buf, iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret >= 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; *buf={3:s};").format(unique, gfid,
+ op_ret,
+ statstr)
+
+ else:
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readv (frame, cookie, this, op_ret, op_errno,
+ vector, count, buf, iobref, xdata)
+ return 0
+
+ def writev_fop(self, frame, this, fd, vector, count, offset, flags,
+ iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}; count={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, count, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count,
+ offset, flags, iobref, xdata)
+ return 0
+
+ def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if op_ret >= 0:
+ preopstr = trace_stat2str(prebuf)
+ postopstr = trace_stat2str(postbuf)
+ print("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " +
+ "*prebuf={2:s}; " +
+ "*postbuf={3:s}").format(unique, op_ret, preopstr,
+ postopstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_writev (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata)
+ return 0
+
+ def opendir_fop(self, frame, this, loc, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "fd={3:s}").format(unique, gfid, loc.contents.path, fd)
+ self.gfids[key] = gfid
+ dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata)
+ return 0
+
+ def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret,
+ op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_opendir(frame, cookie, this, op_ret, op_errno,
+ fd, xdata)
+ return 0
+
+ def readdir_fop(self, frame, this, fd, size, offset, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " +
+ "size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset,
+ xdata)
+ return 0
+
+ def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}").format(unique, gfid, op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def readdirp_fop(self, frame, this, fd, size, offset, dictionary):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ " size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset,
+ dictionary)
+ return 0
+
+ def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def mkdir_fop(self, frame, this, loc, mode, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "mode={3:d}; umask=0{4:o}").format(unique, gfid,
+ loc.contents.path, mode,
+ umask)
+ dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask,
+ xdata)
+ return 0
+
+ def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid, op_ret,
+ statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def rmdir_fop(self, frame, this, loc, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata)
+ return 0
+
+ def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s}").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def stat_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " +
+ " path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s};").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_stat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def fstat_fop(self, frame, this, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}").format(unique, gfid, fd)
+ self.gfids[key] = gfid
+ dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata)
+ return 0
+
+ def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; *buf={3:s}").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique.
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_fstat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def statfs_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ if loc.contents.inode:
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ else:
+ gfid = "0"
+ print("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ #TBD: print buf (pointer to an iovec type object)
+ print("GLUPY TRACE STATFS CBK {0:d}: "+
+ "op_ret={1:d}").format(unique, op_ret)
+ else:
+ print("GLUPY TRACE STATFS CBK- {0:d}"+
+ "op_ret={1:d}; op_errno={2:d}").format(unique,
+ op_ret,
+ op_errno)
+ dl.unwind_statfs(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def getxattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " name={3:s}").format(unique, gfid, loc.contents.path,
+ name)
+ self.gfids[key]=gfid
+ dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata)
+ return 0
+
+ def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}; "+
+ " dictionary={4:s}").format(unique, gfid, op_ret, op_errno,
+ dictionary)
+ del self.gfids[key]
+ dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def fgetxattr_fop(self, frame, this, fd, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ "name={3:s}").format(unique, gfid, fd, name)
+ self.gfids[key] = gfid
+ dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata)
+ return 0
+
+ def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d};"+
+ " dictionary={4:s}").format(unique, gfid, op_ret,
+ op_errno, dictionary)
+ del self.gfids[key]
+ dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary,
+ flags, xdata)
+ return 0
+
+ def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+
+ "flags={3:d}").format(unique, gfid, fd, flags)
+ self.gfids[key] = gfid
+ dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary,
+ flags, xdata)
+ return 0
+
+ def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def removexattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}; name={3:s}").format(unique, gfid,
+ loc.contents.path,
+ name)
+ self.gfids[key] = gfid
+ dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name,
+ xdata)
+ return 0
+
+ def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def link_fop(self, frame, this, oldloc, newloc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if (newloc.contents.inode):
+ newgfid = uuid2str(newloc.contents.inode.contents.gfid)
+ else:
+ newgfid = "0"
+ oldgfid = uuid2str(oldloc.contents.inode.contents.gfid)
+ print("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+
+ "newgfid={3:s};"+
+ "newpath={4:s}").format(unique, oldgfid,
+ oldloc.contents.path,
+ newgfid,
+ newloc.contents.path)
+ self.gfids[key] = oldgfid
+ dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc,
+ xdata)
+ return 0
+
+ def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+
+ "*stbuf={2:s}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def unlink_fop(self, frame, this, loc, xflag, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+
+ "flag={3:d}").format(unique, gfid, loc.contents.path,
+ xflag)
+ self.gfids[key] = gfid
+ dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag,
+ xdata)
+ return 0
+
+ def unlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid, op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_unlink(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def readlink_fop(self, frame, this, loc, size, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " size={3:d}").format(unique, gfid, loc.contents.path,
+ size)
+ self.gfids[key] = gfid
+ dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size,
+ xdata)
+ return 0
+
+ def readlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ buf, stbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(stbuf)
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid,
+ op_ret, op_errno,
+ buf, statstr)
+ else:
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf,
+ stbuf, xdata)
+ return 0
+
+ def symlink_fop(self, frame, this, linkpath, loc, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+
+ "linkpath={2:s}; path={3:s};"+
+ "umask=0{4:o}").format(unique, gfid, linkpath,
+ loc.contents.path, umask)
+ self.gfids[key] = gfid
+ dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc,
+ umask, xdata)
+ return 0
+
+ def symlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+
+ "*postparent={5:s}").format(unique, gfid,
+ op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_symlink(frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata)
+ return 0
diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c
new file mode 100644
index 000000000..dc86c0071
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.c
@@ -0,0 +1,2470 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+#include <Python.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "glupy.h"
+
+/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */
+
+pthread_key_t gil_init_key;
+
+PyGILState_STATE
+glupy_enter (void)
+{
+#if 0
+ if (!pthread_getspecific(gil_init_key)) {
+ PyEval_ReleaseLock();
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+ }
+#endif
+
+ return PyGILState_Ensure();
+}
+
+void
+glupy_leave (PyGILState_STATE gstate)
+{
+ PyGILState_Release(gstate);
+}
+
+/* FOP: LOOKUP */
+
+int32_t
+glupy_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LOOKUP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+glupy_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LOOKUP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+void
+wind_lookup (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_lookup_cbk,xl,xl->fops->lookup,loc,xdata);
+}
+
+void
+unwind_lookup (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(lookup,frame,op_ret,op_errno,
+ inode,buf,xdata,postparent);
+}
+
+void
+set_lookup_fop (long py_this, fop_lookup_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LOOKUP] = (long)fop;
+}
+
+void
+set_lookup_cbk (long py_this, fop_lookup_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LOOKUP] = (long)cbk;
+}
+
+/* FOP: CREATE */
+
+int32_t
+glupy_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_CREATE]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_CREATE]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))(
+ frame, this, loc, flags, mode, umask, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+}
+
+void
+wind_create (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_create_cbk,xl, xl->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+}
+
+void
+unwind_create (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_create_fop (long py_this, fop_create_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_CREATE] = (long)fop;
+}
+
+void
+set_create_cbk (long py_this, fop_create_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_CREATE] = (long)cbk;
+}
+
+/* FOP: OPEN */
+
+int32_t
+glupy_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPEN]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPEN]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))(
+ frame, this, loc, flags, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+void
+wind_open (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_open_cbk, xl, xl->fops->open, loc, flags,
+ fd, xdata);
+}
+
+void
+unwind_open (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+}
+
+void
+set_open_fop (long py_this, fop_open_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_OPEN] = (long)fop;
+}
+
+void
+set_open_cbk (long py_this, fop_open_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_OPEN] = (long)cbk;
+}
+
+/* FOP: READV */
+
+int32_t
+glupy_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))(
+ frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))(
+ frame, this, fd, size, offset, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_readv (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size,
+ offset, flags, xdata);
+}
+
+void
+unwind_readv (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+}
+
+void
+set_readv_fop (long py_this, fop_readv_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_READV] = (long)fop;
+}
+
+void
+set_readv_cbk (long py_this, fop_readv_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_READV] = (long)cbk;
+}
+
+/* FOP: WRITEV */
+
+int32_t
+glupy_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_WRITEV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))(
+ frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_WRITEV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))(
+ frame, this, fd, vector, count, offset, flags,
+ iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+}
+
+void
+wind_writev (call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+}
+
+void
+unwind_writev (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
+
+void
+set_writev_fop (long py_this, fop_writev_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_WRITEV] = (long)fop;
+}
+
+void
+set_writev_cbk (long py_this, fop_writev_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_WRITEV] = (long)cbk;
+}
+
+
+/* FOP: OPENDIR */
+
+int32_t
+glupy_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPENDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPENDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))(
+ frame, this, loc, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+void
+wind_opendir (call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_opendir_cbk,xl,xl->fops->opendir,loc,fd,xdata);
+}
+
+void
+unwind_opendir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(opendir,frame,op_ret,op_errno,
+ fd,xdata);
+}
+
+void
+set_opendir_fop (long py_this, fop_opendir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_OPENDIR] = (long)fop;
+}
+
+void
+set_opendir_cbk (long py_this, fop_opendir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_OPENDIR] = (long)cbk;
+}
+
+/* FOP: READDIR */
+
+int32_t
+glupy_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdir_cbk,xl,xl->fops->readdir,fd,size,offset,xdata);
+}
+
+void
+unwind_readdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdir,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdir_fop (long py_this, fop_readdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIR] = (long)fop;
+}
+
+void
+set_readdir_cbk (long py_this, fop_readdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIR] = (long)cbk;
+}
+
+
+/* FOP: READDIRP */
+
+int32_t
+glupy_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIRP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIRP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdirp (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdirp_cbk,xl,xl->fops->readdirp,fd,size,offset,xdata);
+}
+
+void
+unwind_readdirp (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdirp,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdirp_fop (long py_this, fop_readdirp_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIRP] = (long)fop;
+}
+
+void
+set_readdirp_cbk (long py_this, fop_readdirp_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIRP] = (long)cbk;
+}
+
+
+/* FOP:STAT */
+
+int32_t
+glupy_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+void
+wind_stat (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_stat_cbk,xl,xl->fops->stat,loc,xdata);
+}
+
+void
+unwind_stat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(stat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_stat_fop (long py_this, fop_stat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STAT] = (long)fop;
+}
+
+void
+set_stat_cbk (long py_this, fop_stat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STAT] = (long)cbk;
+}
+
+
+/* FOP: FSTAT */
+
+int32_t
+glupy_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSTAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSTAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))(
+ frame, this, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+void
+wind_fstat (call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_fstat_cbk,xl,xl->fops->fstat,fd,xdata);
+}
+
+void
+unwind_fstat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(fstat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_fstat_fop (long py_this, fop_fstat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSTAT] = (long)fop;
+}
+
+void
+set_fstat_cbk (long py_this, fop_fstat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSTAT] = (long)cbk;
+}
+
+/* FOP:STATFS */
+
+int32_t
+glupy_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STATFS]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STATFS]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
+
+void
+wind_statfs (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_statfs_cbk,xl,xl->fops->statfs,loc,xdata);
+}
+
+void
+unwind_statfs (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(statfs,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_statfs_fop (long py_this, fop_statfs_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STATFS] = (long)fop;
+}
+
+void
+set_statfs_cbk (long py_this, fop_statfs_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STATFS] = (long)cbk;
+}
+
+
+/* FOP: SETXATTR */
+
+int32_t
+glupy_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))(
+ frame, this, loc, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_setxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_setxattr_cbk, xl, xl->fops->setxattr,
+ loc, dict, flags, xdata);
+}
+
+
+void
+unwind_setxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_setxattr_fop (long py_this, fop_setxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SETXATTR] = (long)fop;
+}
+
+void
+set_setxattr_cbk (long py_this, fop_setxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SETXATTR] = (long)cbk;
+}
+
+/* FOP: GETXATTR */
+
+int32_t
+glupy_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_GETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_GETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_getxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_getxattr_cbk, xl, xl->fops->getxattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_getxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_getxattr_fop (long py_this, fop_getxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_GETXATTR] = (long)fop;
+}
+
+
+void
+set_getxattr_cbk (long py_this, fop_getxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_GETXATTR] = (long)cbk;
+}
+
+/* FOP: FSETXATTR */
+
+int32_t
+glupy_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))(
+ frame, this, fd, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_fsetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr,
+ fd, dict, flags, xdata);
+}
+
+
+void
+unwind_fsetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fsetxattr_fop (long py_this, fop_fsetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSETXATTR] = (long)fop;
+}
+
+void
+set_fsetxattr_cbk (long py_this, fop_fsetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSETXATTR] = (long)cbk;
+}
+
+/* FOP: FGETXATTR */
+
+int32_t
+glupy_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FGETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FGETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fgetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fgetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_fgetxattr_fop (long py_this, fop_fgetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FGETXATTR] = (long)fop;
+}
+
+
+void
+set_fgetxattr_cbk (long py_this, fop_fgetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FGETXATTR] = (long)cbk;
+}
+
+/* FOP:REMOVEXATTR */
+
+int32_t
+glupy_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_REMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_REMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_removexattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_removexattr_cbk, xl, xl->fops->removexattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_removexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_removexattr_fop (long py_this, fop_removexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_REMOVEXATTR] = (long)fop;
+}
+
+void
+set_removexattr_cbk (long py_this, fop_removexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP:FREMOVEXATTR */
+
+int32_t
+glupy_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FREMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FREMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fremovexattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fremovexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fremovexattr_fop (long py_this, fop_fremovexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FREMOVEXATTR] = (long)fop;
+}
+
+void
+set_fremovexattr_cbk (long py_this, fop_fremovexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP: LINK*/
+int32_t
+glupy_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))(
+ frame, this, oldloc, newloc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+void
+wind_link (call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_link_cbk, xl, xl->fops->link,
+ oldloc, newloc, xdata);
+}
+
+void
+unwind_link (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_link_fop (long py_this, fop_link_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LINK] = (long)fop;
+}
+
+void
+set_link_cbk (long py_this, fop_link_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LINK] = (long)cbk;
+}
+
+/* FOP: SYMLINK*/
+int32_t
+glupy_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SYMLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SYMLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))(
+ frame, this, linkname, loc, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc,
+ umask, xdata);
+ return 0;
+}
+
+void
+wind_symlink (call_frame_t *frame, xlator_t *xl, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_symlink_cbk, xl, xl->fops->symlink,
+ linkname, loc, umask, xdata);
+}
+
+void
+unwind_symlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_symlink_fop (long py_this, fop_symlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SYMLINK] = (long)fop;
+}
+
+void
+set_symlink_cbk (long py_this, fop_symlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SYMLINK] = (long)cbk;
+}
+
+
+/* FOP: READLINK */
+int32_t
+glupy_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path,
+ buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))(
+ frame, this, loc, size, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
+ return 0;
+}
+
+void
+wind_readlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readlink_cbk, xl, xl->fops->readlink,
+ loc, size, xdata);
+}
+
+void
+unwind_readlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf,
+ xdata);
+}
+
+void
+set_readlink_fop (long py_this, fop_readlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READLINK] = (long)fop;
+}
+
+void
+set_readlink_cbk (long py_this, fop_readlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READLINK] = (long)cbk;
+}
+
+
+/* FOP: UNLINK */
+
+int32_t
+glupy_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_UNLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_UNLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_unlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_unlink_cbk, xl, xl->fops->unlink,
+ loc, xflags, xdata);
+}
+
+void
+unwind_unlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_unlink_fop (long py_this, fop_unlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_UNLINK] = (long)fop;
+}
+
+void
+set_unlink_cbk (long py_this, fop_unlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_UNLINK] = (long)cbk;
+}
+
+
+/* FOP: MKDIR */
+
+int32_t
+glupy_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_MKDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_MKDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))(
+ frame, this, loc, mode, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+ return 0;
+}
+
+void
+wind_mkdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_mkdir_cbk, xl, xl->fops->mkdir,
+ loc, mode, umask, xdata);
+}
+
+void
+unwind_mkdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_mkdir_fop (long py_this, fop_mkdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_MKDIR] = (long)fop;
+}
+
+void
+set_mkdir_cbk (long py_this, fop_mkdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_MKDIR] = (long)cbk;
+}
+
+
+/* FOP: RMDIR */
+
+int32_t
+glupy_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_RMDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_RMDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_rmdir (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_rmdir_cbk, xl, xl->fops->rmdir,
+ loc, xflags, xdata);
+}
+
+void
+unwind_rmdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_rmdir_fop (long py_this, fop_rmdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_RMDIR] = (long)fop;
+}
+
+void
+set_rmdir_cbk (long py_this, fop_rmdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_RMDIR] = (long)cbk;
+}
+
+
+/* NON-FOP-SPECIFIC CODE */
+
+
+long
+get_id (call_frame_t *frame)
+{
+ return (long)(frame->local);
+}
+
+uint64_t
+get_rootunique (call_frame_t *frame)
+{
+ return frame->root->unique;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ glupy_private_t *priv = NULL;
+ char *module_name = NULL;
+ PyObject *py_mod_name = NULL;
+ PyObject *py_init_func = NULL;
+ PyObject *py_args = NULL;
+ PyObject *syspath = NULL;
+ PyObject *path = NULL;
+ static gf_boolean_t py_inited = _gf_false;
+ void * err_cleanup = &&err_return;
+
+ if (dict_get_str(this->options,"module-name",&module_name) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "missing module-name");
+ return -1;
+ }
+
+ priv = GF_CALLOC (1, sizeof (glupy_private_t), gf_glupy_mt_priv);
+ if (!priv) {
+ goto *err_cleanup;
+ }
+ this->private = priv;
+ err_cleanup = &&err_free_priv;
+
+ if (!py_inited) {
+ Py_Initialize();
+ PyEval_InitThreads();
+#if 0
+ (void)pthread_key_create(&gil_init_key,NULL);
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+#endif
+ /* PyEval_InitThreads takes this "for" us. No thanks. */
+ PyEval_ReleaseLock();
+ py_inited = _gf_true;
+ }
+
+ /* Adjust python's path */
+ syspath = PySys_GetObject("path");
+ path = PyString_FromString(GLUSTER_PYTHON_PATH);
+ PyList_Append(syspath, path);
+ Py_DECREF(path);
+
+ py_mod_name = PyString_FromString(module_name);
+ if (!py_mod_name) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create name");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_ERROR, "py_mod_name = %s", module_name);
+ priv->py_module = PyImport_Import(py_mod_name);
+ Py_DECREF(py_mod_name);
+ if (!priv->py_module) {
+ gf_log (this->name, GF_LOG_ERROR, "Python import failed");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ err_cleanup = &&err_deref_module;
+
+ py_init_func = PyObject_GetAttrString(priv->py_module, "xlator");
+ if (!py_init_func || !PyCallable_Check(py_init_func)) {
+ gf_log (this->name, GF_LOG_ERROR, "missing init func");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ err_cleanup = &&err_deref_init;
+
+ py_args = PyTuple_New(1);
+ if (!py_args) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create args");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ PyTuple_SetItem(py_args,0,PyLong_FromLong((long)this));
+
+ /* TBD: pass in list of children */
+ priv->py_xlator = PyObject_CallObject(py_init_func, py_args);
+ Py_DECREF(py_args);
+ if (!priv->py_xlator) {
+ gf_log (this->name, GF_LOG_ERROR, "Python init failed");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ gf_log (this->name, GF_LOG_INFO, "init returned %p", priv->py_xlator);
+
+ return 0;
+
+err_deref_init:
+ Py_DECREF(py_init_func);
+err_deref_module:
+ Py_DECREF(priv->py_module);
+err_free_priv:
+ GF_FREE(priv);
+err_return:
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ glupy_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ Py_DECREF(priv->py_xlator);
+ Py_DECREF(priv->py_module);
+ this->private = NULL;
+ GF_FREE (priv);
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = glupy_lookup,
+ .create = glupy_create,
+ .open = glupy_open,
+ .readv = glupy_readv,
+ .writev = glupy_writev,
+ .opendir = glupy_opendir,
+ .readdir = glupy_readdir,
+ .stat = glupy_stat,
+ .fstat = glupy_fstat,
+ .setxattr = glupy_setxattr,
+ .getxattr = glupy_getxattr,
+ .fsetxattr = glupy_fsetxattr,
+ .fgetxattr = glupy_fgetxattr,
+ .removexattr = glupy_removexattr,
+ .fremovexattr = glupy_fremovexattr,
+ .link = glupy_link,
+ .unlink = glupy_unlink,
+ .readlink = glupy_readlink,
+ .symlink = glupy_symlink,
+ .mkdir = glupy_mkdir,
+ .rmdir = glupy_rmdir,
+ .statfs = glupy_statfs,
+ .readdirp = glupy_readdirp
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h
new file mode 100644
index 000000000..8661fce88
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.h
@@ -0,0 +1,69 @@
+/*
+ Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __GLUPY_H__
+#define __GLUPY_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "mem-types.h"
+
+enum {
+ GLUPY_LOOKUP = 0,
+ GLUPY_CREATE,
+ GLUPY_OPEN,
+ GLUPY_READV,
+ GLUPY_WRITEV,
+ GLUPY_OPENDIR,
+ GLUPY_READDIR,
+ GLUPY_READDIRP,
+ GLUPY_STAT,
+ GLUPY_FSTAT,
+ GLUPY_STATFS,
+ GLUPY_SETXATTR,
+ GLUPY_GETXATTR,
+ GLUPY_FSETXATTR,
+ GLUPY_FGETXATTR,
+ GLUPY_REMOVEXATTR,
+ GLUPY_FREMOVEXATTR,
+ GLUPY_LINK,
+ GLUPY_UNLINK,
+ GLUPY_READLINK,
+ GLUPY_SYMLINK,
+ GLUPY_MKNOD,
+ GLUPY_MKDIR,
+ GLUPY_RMDIR,
+ GLUPY_N_FUNCS
+};
+
+typedef struct {
+ PyObject *py_module;
+ PyObject *py_xlator;
+ long fops[GLUPY_N_FUNCS];
+ long cbks[GLUPY_N_FUNCS];
+} glupy_private_t;
+
+enum gf_glupy_mem_types_ {
+ gf_glupy_mt_priv = gf_common_mt_end + 1,
+ gf_glupy_mt_end
+};
+
+#endif /* __GLUPY_H__ */
diff --git a/xlators/features/glupy/src/gluster.py b/xlators/features/glupy/src/gluster.py
new file mode 100644
index 000000000..a5daa77d3
--- /dev/null
+++ b/xlators/features/glupy/src/gluster.py
@@ -0,0 +1,841 @@
+import sys
+from ctypes import *
+
+dl = CDLL("",RTLD_GLOBAL)
+
+
+class call_frame_t (Structure):
+ pass
+
+class dev_t (Structure):
+ pass
+
+
+class dict_t (Structure):
+ pass
+
+
+class gf_dirent_t (Structure):
+ pass
+
+
+class iobref_t (Structure):
+ pass
+
+
+class iovec_t (Structure):
+ pass
+
+
+class list_head (Structure):
+ pass
+
+list_head._fields_ = [
+ ("next", POINTER(list_head)),
+ ("prev", POINTER(list_head))
+ ]
+
+
+class rwxperm_t (Structure):
+ _fields_ = [
+ ("read", c_uint8, 1),
+ ("write", c_uint8, 1),
+ ("execn", c_uint8, 1)
+ ]
+
+
+class statvfs_t (Structure):
+ pass
+
+
+class xlator_t (Structure):
+ pass
+
+
+class ia_prot_t (Structure):
+ _fields_ = [
+ ("suid", c_uint8, 1),
+ ("sgid", c_uint8, 1),
+ ("sticky", c_uint8, 1),
+ ("owner", rwxperm_t),
+ ("group", rwxperm_t),
+ ("other", rwxperm_t)
+ ]
+
+# For checking file type.
+(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO,
+ IA_IFSOCK) = xrange(8)
+
+
+class iatt_t (Structure):
+ _fields_ = [
+ ("ia_no", c_uint64),
+ ("ia_gfid", c_ubyte * 16),
+ ("ia_dev", c_uint64),
+ ("ia_type", c_uint),
+ ("ia_prot", ia_prot_t),
+ ("ia_nlink", c_uint32),
+ ("ia_uid", c_uint32),
+ ("ia_gid", c_uint32),
+ ("ia_rdev", c_uint64),
+ ("ia_size", c_uint64),
+ ("ia_blksize", c_uint32),
+ ("ia_blocks", c_uint64),
+ ("ia_atime", c_uint32 ),
+ ("ia_atime_nsec", c_uint32),
+ ("ia_mtime", c_uint32),
+ ("ia_mtime_nsec", c_uint32),
+ ("ia_ctime", c_uint32),
+ ("ia_ctime_nsec", c_uint32)
+ ]
+
+
+class mem_pool (Structure):
+ _fields_ = [
+ ("list", list_head),
+ ("hot_count", c_int),
+ ("cold_count", c_int),
+ ("lock", c_void_p),
+ ("padded_sizeof_type", c_ulong),
+ ("pool", c_void_p),
+ ("pool_end", c_void_p),
+ ("real_sizeof_type", c_int),
+ ("alloc_count", c_uint64),
+ ("pool_misses", c_uint64),
+ ("max_alloc", c_int),
+ ("curr_stdalloc", c_int),
+ ("max_stdalloc", c_int),
+ ("name", c_char_p),
+ ("global_list", list_head)
+ ]
+
+
+class U_ctx_key_inode (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", POINTER(xlator_t))
+ ]
+
+
+class U_ctx_value1 (Union):
+ _fields_ = [
+ ("value1", c_uint64),
+ ("ptr1", c_void_p)
+ ]
+
+
+class U_ctx_value2 (Union):
+ _fields_ = [
+ ("value2", c_uint64),
+ ("ptr2", c_void_p)
+ ]
+
+class inode_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1","u_value2",)
+ _fields_ = [
+ ("u_key", U_ctx_key_inode),
+ ("u_value1", U_ctx_value1),
+ ("u_value2", U_ctx_value2)
+ ]
+
+class inode_t (Structure):
+ pass
+
+class inode_table_t (Structure):
+ _fields_ = [
+ ("lock", c_void_p),
+ ("hashsize", c_size_t),
+ ("name", c_char_p),
+ ("root", POINTER(inode_t)),
+ ("xl", POINTER(xlator_t)),
+ ("lru_limit", c_uint32),
+ ("inode_hash", POINTER(list_head)),
+ ("name_hash", POINTER(list_head)),
+ ("active", list_head),
+ ("active_size", c_uint32),
+ ("lru", list_head),
+ ("lru_size", c_uint32),
+ ("purge", list_head),
+ ("purge_size", c_uint32),
+ ("inode_pool", POINTER(mem_pool)),
+ ("dentry_pool", POINTER(mem_pool)),
+ ("fd_mem_pool", POINTER(mem_pool))
+ ]
+
+inode_t._fields_ = [
+ ("table", POINTER(inode_table_t)),
+ ("gfid", c_ubyte * 16),
+ ("lock", c_void_p),
+ ("nlookup", c_uint64),
+ ("fd_count", c_uint32),
+ ("ref", c_uint32),
+ ("ia_type", c_uint),
+ ("fd_list", list_head),
+ ("dentry_list", list_head),
+ ("hashv", list_head),
+ ("listv", list_head),
+ ("ctx", POINTER(inode_ctx))
+ ]
+
+
+
+class U_ctx_key_fd (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", c_void_p)
+ ]
+
+class fd_lk_ctx (Structure):
+ _fields_ = [
+ ("lk_list", list_head),
+ ("ref", c_int),
+ ("lock", c_void_p)
+ ]
+
+class fd_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1")
+ _fields_ = [
+ ("u_key", U_ctx_key_fd),
+ ("u_value1", U_ctx_value1)
+ ]
+
+class fd_t (Structure):
+ _fields_ = [
+ ("pid", c_uint64),
+ ("flags", c_int32),
+ ("refcount", c_int32),
+ ("inode_list", list_head),
+ ("inode", POINTER(inode_t)),
+ ("lock", c_void_p),
+ ("ctx", POINTER(fd_ctx)),
+ ("xl_count", c_int),
+ ("lk_ctx", POINTER(fd_lk_ctx)),
+ ("anonymous", c_uint)
+ ]
+
+class loc_t (Structure):
+ _fields_ = [
+ ("path", c_char_p),
+ ("name", c_char_p),
+ ("inode", POINTER(inode_t)),
+ ("parent", POINTER(inode_t)),
+ ("gfid", c_ubyte * 16),
+ ("pargfid", c_ubyte * 16),
+ ]
+
+
+
+def _init_op (a_class, fop, cbk, wind, unwind):
+ # Decorators, used by translators. We could pass the signatures as
+ # parameters, but it's actually kind of nice to keep them around for
+ # inspection.
+ a_class.fop_type = apply(CFUNCTYPE,a_class.fop_sig)
+ a_class.cbk_type = apply(CFUNCTYPE,a_class.cbk_sig)
+ # Dispatch-function registration.
+ fop.restype = None
+ fop.argtypes = [ c_long, a_class.fop_type ]
+ # Callback-function registration.
+ cbk.restype = None
+ cbk.argtypes = [ c_long, a_class.cbk_type ]
+ # STACK_WIND function.
+ wind.restype = None
+ wind.argtypes = list(a_class.fop_sig[1:])
+ # STACK_UNWIND function.
+ unwind.restype = None
+ unwind.argtypes = list(a_class.cbk_sig[1:])
+
+class OpLookup:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(dict_t), POINTER(iatt_t))
+_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk,
+ dl.wind_lookup, dl.unwind_lookup)
+
+class OpCreate:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t),
+ POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(inode_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk,
+ dl.wind_create, dl.unwind_create)
+
+class OpOpen:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk,
+ dl.wind_open, dl.unwind_open)
+
+class OpReadv:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t),
+ POINTER(iobref_t), POINTER(dict_t))
+_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk,
+ dl.wind_readv, dl.unwind_readv)
+class OpWritev:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32,
+ POINTER(iobref_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk,
+ dl.wind_writev, dl.unwind_writev)
+
+class OpOpendir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(fd_t) ,POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk,
+ dl.wind_opendir, dl.unwind_opendir)
+
+class OpReaddir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk,
+ dl.wind_readdir, dl.unwind_readdir)
+
+class OpReaddirp:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk,
+ dl.wind_readdirp, dl.unwind_readdirp)
+
+class OpStat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk,
+ dl.wind_stat, dl.unwind_stat)
+
+class OpFstat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk,
+ dl.wind_fstat, dl.unwind_fstat)
+
+class OpStatfs:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(statvfs_t), POINTER(dict_t))
+_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk,
+ dl.wind_statfs, dl.unwind_statfs)
+
+
+class OpSetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk,
+ dl.wind_setxattr, dl.unwind_setxattr)
+
+class OpGetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk,
+ dl.wind_getxattr, dl.unwind_getxattr)
+
+class OpFsetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk,
+ dl.wind_fsetxattr, dl.unwind_fsetxattr)
+
+class OpFgetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk,
+ dl.wind_fgetxattr, dl.unwind_fgetxattr)
+
+class OpRemovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk,
+ dl.wind_removexattr, dl.unwind_removexattr)
+
+
+class OpFremovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk,
+ dl.wind_fremovexattr, dl.unwind_fremovexattr)
+
+class OpLink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk,
+ dl.wind_link, dl.unwind_link)
+
+class OpSymlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk,
+ dl.wind_symlink, dl.unwind_symlink)
+
+class OpUnlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk,
+ dl.wind_unlink, dl.unwind_unlink)
+
+class OpReadlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_size_t, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk,
+ dl.wind_readlink, dl.unwind_readlink)
+
+class OpMkdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_uint, c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk,
+ dl.wind_mkdir, dl.unwind_mkdir)
+
+class OpRmdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk,
+ dl.wind_rmdir, dl.unwind_rmdir)
+
+
+class Translator:
+ def __init__ (self, c_this):
+ # This is only here to keep references to the stubs we create,
+ # because ctypes doesn't and glupy.so can't because it doesn't
+ # get a pointer to the actual Python object. It's a dictionary
+ # instead of a list in case we ever allow changing fops/cbks
+ # after initialization and need to look them up.
+ self.stub_refs = {}
+ funcs = dir(self.__class__)
+ if "lookup_fop" in funcs:
+ @OpLookup.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.lookup_fop (frame, this, loc, xdata)
+ self.stub_refs["lookup_fop"] = stub
+ dl.set_lookup_fop(c_this,stub)
+ if "lookup_cbk" in funcs:
+ @OpLookup.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, xdata, postparent, s=self):
+ return s.lookup_cbk(frame, cookie, this, op_ret,
+ op_errno, inode, buf, xdata,
+ postparent)
+ self.stub_refs["lookup_cbk"] = stub
+ dl.set_lookup_cbk(c_this,stub)
+ if "create_fop" in funcs:
+ @OpCreate.fop_type
+ def stub (frame, this, loc, flags, mode, umask, fd,
+ xdata, s=self):
+ return s.create_fop (frame, this, loc, flags,
+ mode, umask, fd, xdata)
+ self.stub_refs["create_fop"] = stub
+ dl.set_create_fop(c_this,stub)
+ if "create_cbk" in funcs:
+ @OpCreate.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.create_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ inode, buf, preparent,
+ postparent, xdata)
+ self.stub_refs["create_cbk"] = stub
+ dl.set_create_cbk(c_this,stub)
+ if "open_fop" in funcs:
+ @OpOpen.fop_type
+ def stub (frame, this, loc, flags, fd,
+ xdata, s=self):
+ return s.open_fop (frame, this, loc, flags,
+ fd, xdata)
+ self.stub_refs["open_fop"] = stub
+ dl.set_open_fop(c_this,stub)
+ if "open_cbk" in funcs:
+ @OpOpen.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.open_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["open_cbk"] = stub
+ dl.set_open_cbk(c_this,stub)
+ if "readv_fop" in funcs:
+ @OpReadv.fop_type
+ def stub (frame, this, fd, size, offset, flags,
+ xdata, s=self):
+ return s.readv_fop (frame, this, fd, size,
+ offset, flags, xdata)
+ self.stub_refs["readv_fop"] = stub
+ dl.set_readv_fop(c_this,stub)
+ if "readv_cbk" in funcs:
+ @OpReadv.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata,
+ s=self):
+ return s.readv_cbk (frame, cookie, this,
+ op_ret, op_errno, vector,
+ count, stbuf, iobref,
+ xdata)
+ self.stub_refs["readv_cbk"] = stub
+ dl.set_readv_cbk(c_this,stub)
+ if "writev_fop" in funcs:
+ @OpWritev.fop_type
+ def stub (frame, this, fd, vector, count,
+ offset, flags, iobref, xdata, s=self):
+ return s.writev_fop (frame, this, fd, vector,
+ count, offset, flags,
+ iobref, xdata)
+ self.stub_refs["writev_fop"] = stub
+ dl.set_writev_fop(c_this,stub)
+ if "writev_cbk" in funcs:
+ @OpWritev.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata, s=self):
+ return s.writev_cbk (frame, cookie, this,
+ op_ret, op_errno, prebuf,
+ postbuf, xdata)
+ self.stub_refs["writev_cbk"] = stub
+ dl.set_writev_cbk(c_this,stub)
+ if "opendir_fop" in funcs:
+ @OpOpendir.fop_type
+ def stub (frame, this, loc, fd, xdata, s=self):
+ return s.opendir_fop (frame, this, loc, fd,
+ xdata)
+ self.stub_refs["opendir_fop"] = stub
+ dl.set_opendir_fop(c_this,stub)
+ if "opendir_cbk" in funcs:
+ @OpOpendir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.opendir_cbk(frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["opendir_cbk"] = stub
+ dl.set_opendir_cbk(c_this,stub)
+ if "readdir_fop" in funcs:
+ @OpReaddir.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdir_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdir_fop"] = stub
+ dl.set_readdir_fop(c_this,stub)
+ if "readdir_cbk" in funcs:
+ @OpReaddir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdir_cbk(frame, cookie, this,
+ op_ret, op_errno, entries,
+ xdata)
+ self.stub_refs["readdir_cbk"] = stub
+ dl.set_readdir_cbk(c_this,stub)
+ if "readdirp_fop" in funcs:
+ @OpReaddirp.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdirp_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdirp_fop"] = stub
+ dl.set_readdirp_fop(c_this,stub)
+ if "readdirp_cbk" in funcs:
+ @OpReaddirp.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdirp_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ entries, xdata)
+ self.stub_refs["readdirp_cbk"] = stub
+ dl.set_readdirp_cbk(c_this,stub)
+ if "stat_fop" in funcs:
+ @OpStat.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.stat_fop (frame, this, loc, xdata)
+ self.stub_refs["stat_fop"] = stub
+ dl.set_stat_fop(c_this,stub)
+ if "stat_cbk" in funcs:
+ @OpStat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.stat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["stat_cbk"] = stub
+ dl.set_stat_cbk(c_this,stub)
+ if "fstat_fop" in funcs:
+ @OpFstat.fop_type
+ def stub (frame, this, fd, xdata, s=self):
+ return s.fstat_fop (frame, this, fd, xdata)
+ self.stub_refs["fstat_fop"] = stub
+ dl.set_fstat_fop(c_this,stub)
+ if "fstat_cbk" in funcs:
+ @OpFstat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.fstat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["fstat_cbk"] = stub
+ dl.set_fstat_cbk(c_this,stub)
+ if "statfs_fop" in funcs:
+ @OpStatfs.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.statfs_fop (frame, this, loc, xdata)
+ self.stub_refs["statfs_fop"] = stub
+ dl.set_statfs_fop(c_this,stub)
+ if "statfs_cbk" in funcs:
+ @OpStatfs.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.statfs_cbk (frame, cookie, this,
+ op_ret, op_errno, buf,
+ xdata)
+ self.stub_refs["statfs_cbk"] = stub
+ dl.set_statfs_cbk(c_this,stub)
+ if "setxattr_fop" in funcs:
+ @OpSetxattr.fop_type
+ def stub (frame, this, loc, dictionary, flags, xdata,
+ s=self):
+ return s.setxattr_fop (frame, this, loc,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["setxattr_fop"] = stub
+ dl.set_setxattr_fop(c_this,stub)
+ if "setxattr_cbk" in funcs:
+ @OpSetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.setxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["setxattr_cbk"] = stub
+ dl.set_setxattr_cbk(c_this,stub)
+ if "getxattr_fop" in funcs:
+ @OpGetxattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.getxattr_fop (frame, this, loc, name,
+ xdata)
+ self.stub_refs["getxattr_fop"] = stub
+ dl.set_getxattr_fop(c_this,stub)
+ if "getxattr_cbk" in funcs:
+ @OpGetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.getxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["getxattr_cbk"] = stub
+ dl.set_getxattr_cbk(c_this,stub)
+ if "fsetxattr_fop" in funcs:
+ @OpFsetxattr.fop_type
+ def stub (frame, this, fd, dictionary, flags, xdata,
+ s=self):
+ return s.fsetxattr_fop (frame, this, fd,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["fsetxattr_fop"] = stub
+ dl.set_fsetxattr_fop(c_this,stub)
+ if "fsetxattr_cbk" in funcs:
+ @OpFsetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.fsetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["fsetxattr_cbk"] = stub
+ dl.set_fsetxattr_cbk(c_this,stub)
+ if "fgetxattr_fop" in funcs:
+ @OpFgetxattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fgetxattr_fop (frame, this, fd, name,
+ xdata)
+ self.stub_refs["fgetxattr_fop"] = stub
+ dl.set_fgetxattr_fop(c_this,stub)
+ if "fgetxattr_cbk" in funcs:
+ @OpFgetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.fgetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["fgetxattr_cbk"] = stub
+ dl.set_fgetxattr_cbk(c_this,stub)
+ if "removexattr_fop" in funcs:
+ @OpRemovexattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.removexattr_fop (frame, this, loc,
+ name, xdata)
+ self.stub_refs["removexattr_fop"] = stub
+ dl.set_removexattr_fop(c_this,stub)
+ if "removexattr_cbk" in funcs:
+ @OpRemovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.removexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["removexattr_cbk"] = stub
+ dl.set_removexattr_cbk(c_this,stub)
+ if "fremovexattr_fop" in funcs:
+ @OpFremovexattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fremovexattr_fop (frame, this, fd,
+ name, xdata)
+ self.stub_refs["fremovexattr_fop"] = stub
+ dl.set_fremovexattr_fop(c_this,stub)
+ if "fremovexattr_cbk" in funcs:
+ @OpFremovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.fremovexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["fremovexattr_cbk"] = stub
+ dl.set_fremovexattr_cbk(c_this,stub)
+ if "link_fop" in funcs:
+ @OpLink.fop_type
+ def stub (frame, this, oldloc, newloc,
+ xdata, s=self):
+ return s.link_fop (frame, this, oldloc,
+ newloc, xdata)
+ self.stub_refs["link_fop"] = stub
+ dl.set_link_fop(c_this,stub)
+ if "link_cbk" in funcs:
+ @OpLink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.link_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["link_cbk"] = stub
+ dl.set_link_cbk(c_this,stub)
+ if "symlink_fop" in funcs:
+ @OpSymlink.fop_type
+ def stub (frame, this, linkname, loc,
+ umask, xdata, s=self):
+ return s.symlink_fop (frame, this, linkname,
+ loc, umask, xdata)
+ self.stub_refs["symlink_fop"] = stub
+ dl.set_symlink_fop(c_this,stub)
+ if "symlink_cbk" in funcs:
+ @OpSymlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.symlink_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["symlink_cbk"] = stub
+ dl.set_symlink_cbk(c_this,stub)
+ if "unlink_fop" in funcs:
+ @OpUnlink.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.unlink_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["unlink_fop"] = stub
+ dl.set_unlink_fop(c_this,stub)
+ if "unlink_cbk" in funcs:
+ @OpUnlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.unlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["unlink_cbk"] = stub
+ dl.set_unlink_cbk(c_this,stub)
+ if "readlink_fop" in funcs:
+ @OpReadlink.fop_type
+ def stub (frame, this, loc, size,
+ xdata, s=self):
+ return s.readlink_fop (frame, this, loc,
+ size, xdata)
+ self.stub_refs["readlink_fop"] = stub
+ dl.set_readlink_fop(c_this,stub)
+ if "readlink_cbk" in funcs:
+ @OpReadlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata, s=self):
+ return s.readlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ path, buf, xdata)
+ self.stub_refs["readlink_cbk"] = stub
+ dl.set_readlink_cbk(c_this,stub)
+ if "mkdir_fop" in funcs:
+ @OpMkdir.fop_type
+ def stub (frame, this, loc, mode, umask, xdata,
+ s=self):
+ return s.mkdir_fop (frame, this, loc, mode,
+ umask, xdata)
+ self.stub_refs["mkdir_fop"] = stub
+ dl.set_mkdir_fop(c_this,stub)
+ if "mkdir_cbk" in funcs:
+ @OpMkdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata, s=self):
+ return s.mkdir_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["mkdir_cbk"] = stub
+ dl.set_mkdir_cbk(c_this,stub)
+ if "rmdir_fop" in funcs:
+ @OpRmdir.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.rmdir_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["rmdir_fop"] = stub
+ dl.set_rmdir_fop(c_this,stub)
+ if "rmdir_cbk" in funcs:
+ @OpRmdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.rmdir_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["rmdir_cbk"] = stub
+ dl.set_rmdir_cbk(c_this,stub)
diff --git a/xlators/features/glupy/src/helloworld.py b/xlators/features/glupy/src/helloworld.py
new file mode 100644
index 000000000..8fe403711
--- /dev/null
+++ b/xlators/features/glupy/src/helloworld.py
@@ -0,0 +1,19 @@
+import sys
+from gluster import *
+
+class xlator (Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ print "Python xlator: Hello!"
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "Python xlator: Hello again!"
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent)
+ return 0
diff --git a/xlators/features/glupy/src/negative.py b/xlators/features/glupy/src/negative.py
new file mode 100644
index 000000000..1023602b9
--- /dev/null
+++ b/xlators/features/glupy/src/negative.py
@@ -0,0 +1,92 @@
+import sys
+from uuid import UUID
+from gluster import *
+
+# Negative-lookup-caching example. If a file wasn't there the last time we
+# looked, it's probably still not there. This translator keeps track of
+# those failed lookups for us, and returns ENOENT without needing to pass the
+# call any further for repeated requests.
+
+# If we were doing this for real, we'd need separate caches for each xlator
+# instance. The easiest way to do this would be to have xlator.__init__
+# "register" each instance in a module-global dict, with the key as the C
+# translator address and the value as the xlator object itself. For testing
+# and teaching, it's sufficient just to have one cache. The keys are parent
+# GFIDs, and the entries are lists of names within that parent that we know
+# don't exist.
+cache = {}
+
+# TBD: we need a better way of handling per-request data (frame->local in C).
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+class xlator (Translator):
+
+ def __init__ (self, c_this):
+ self.requests = {}
+ Translator.__init__(self,c_this)
+
+ def lookup_fop (self, frame, this, loc, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "lookup FOP: %s:%s" % (pargfid, loc.contents.name)
+ # Check the cache.
+ if cache.has_key(pargfid):
+ if loc.contents.name in cache[pargfid]:
+ print "short-circuiting for %s:%s" % (pargfid,
+ loc.contents.name)
+ dl.unwind_lookup(frame,0,this,-1,2,None,None,None,None)
+ return 0
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_lookup(frame,POINTER(xlator_t)(),loc,xdata)
+ return 0
+
+ def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "lookup CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "found %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ elif op_errno == 2: # ENOENT
+ print "failed to find %s, adding to cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].add(name)
+ else:
+ cache[pargfid] = set([name])
+ del self.requests[key]
+ dl.unwind_lookup(frame,cookie,this,op_ret,op_errno,
+ inode,buf,xdata,postparent)
+ return 0
+
+ def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "create FOP: %s:%s" % (pargfid, loc.contents.name)
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_create(frame,POINTER(xlator_t)(),loc,flags,mode,umask,fd,xdata)
+ return 0
+
+ def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata):
+ print "create CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "created %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ del self.requests[key]
+ dl.unwind_create(frame,cookie,this,op_ret,op_errno,fd,inode,buf,
+ preparent,postparent,xdata)
+ return 0
+
diff --git a/xlators/features/index/Makefile.am b/xlators/features/index/Makefile.am
new file mode 100644
index 000000000..a985f42a8
--- /dev/null
+++ b/xlators/features/index/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am
new file mode 100644
index 000000000..73bb8972e
--- /dev/null
+++ b/xlators/features/index/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = index.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+index_la_LDFLAGS = -module -avoid-version
+
+index_la_SOURCES = index.c
+index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = index.h index-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h
new file mode 100644
index 000000000..553d492df
--- /dev/null
+++ b/xlators/features/index/src/index-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_MEM_TYPES_H__
+#define __QUIESCE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_index_mem_types_ {
+ gf_index_mt_priv_t = gf_common_mt_end + 1,
+ gf_index_inode_ctx_t = gf_common_mt_end + 2,
+ gf_index_fd_ctx_t = gf_common_mt_end + 3,
+ gf_index_mt_end
+};
+#endif
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
new file mode 100644
index 000000000..9253120f3
--- /dev/null
+++ b/xlators/features/index/src/index.c
@@ -0,0 +1,1489 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "index.h"
+#include "options.h"
+#include "glusterfs3-xdr.h"
+#include "syncop.h"
+
+#define XATTROP_SUBDIR "xattrop"
+#define BASE_INDICES_HOLDER_SUBDIR "base_indices_holder"
+
+call_stub_t *
+__index_dequeue (struct list_head *callstubs)
+{
+ call_stub_t *stub = NULL;
+
+ if (!list_empty (callstubs)) {
+ stub = list_entry (callstubs->next, call_stub_t, list);
+ list_del_init (&stub->list);
+ }
+
+ return stub;
+}
+
+inline static void
+__index_enqueue (struct list_head *callstubs, call_stub_t *stub)
+{
+ list_add_tail (&stub->list, callstubs);
+}
+
+static void
+worker_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->mutex);
+ {
+ __index_enqueue (&priv->callstubs, stub);
+ pthread_cond_signal (&priv->cond);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+}
+
+void *
+index_worker (void *data)
+{
+ index_priv_t *priv = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ int ret = 0;
+
+ THIS = data;
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->mutex);
+ {
+ while (list_empty (&priv->callstubs)) {
+ ret = pthread_cond_wait (&priv->cond,
+ &priv->mutex);
+ }
+
+ stub = __index_dequeue (&priv->callstubs);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ if (stub) /* guard against spurious wakeups */
+ call_resume (stub);
+ }
+
+ return NULL;
+}
+int
+__index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+ index_inode_ctx_t *ictx = NULL;
+ uint64_t tmpctx = 0;
+
+ ret = __inode_ctx_get (inode, this, &tmpctx);
+ if (!ret) {
+ ictx = (index_inode_ctx_t*) (long) tmpctx;
+ goto out;
+ }
+ ictx = GF_CALLOC (1, sizeof (*ictx), gf_index_inode_ctx_t);
+ if (!ictx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ictx->callstubs);
+ ret = __inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ GF_FREE (ictx);
+ ictx = NULL;
+ goto out;
+ }
+out:
+ if (ictx)
+ *ctx = ictx;
+ return ret;
+}
+
+int
+index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+static void
+make_index_dir_path (char *base, const char *subdir,
+ char *index_dir, size_t len)
+{
+ snprintf (index_dir, len, "%s/%s", base, subdir);
+}
+
+int
+index_dir_create (xlator_t *this, const char *subdir)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char fullpath[PATH_MAX] = {0};
+ char path[PATH_MAX] = {0};
+ char *dir = NULL;
+ index_priv_t *priv = NULL;
+ size_t len = 0;
+ size_t pathlen = 0;
+
+ priv = this->private;
+ make_index_dir_path (priv->index_basepath, subdir, fullpath,
+ sizeof (fullpath));
+ ret = stat (fullpath, &st);
+ if (!ret) {
+ if (!S_ISDIR (st.st_mode))
+ ret = -2;
+ goto out;
+ }
+
+ pathlen = strlen (fullpath);
+ if ((pathlen > 1) && fullpath[pathlen - 1] == '/')
+ fullpath[pathlen - 1] = '\0';
+ dir = strchr (fullpath, '/');
+ while (dir) {
+ dir = strchr (dir + 1, '/');
+ if (dir)
+ len = pathlen - strlen (dir);
+ else
+ len = pathlen;
+ strncpy (path, fullpath, len);
+ path[len] = '\0';
+ ret = mkdir (path, 0600);
+ if (ret && (errno != EEXIST))
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to "
+ "create (%s)", priv->index_basepath, subdir,
+ strerror (errno));
+ } else if (ret == -2) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to create, "
+ "path exists, not a directory ", priv->index_basepath,
+ subdir);
+ }
+ return ret;
+}
+
+void
+index_get_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+void
+index_generate_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ //To prevent duplicate generates.
+ //This method fails if number of contending threads is greater
+ //than MAX_LINK count of the fs
+ if (!uuid_compare (priv->index, index))
+ uuid_generate (priv->index);
+ uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+static void
+make_index_path (char *base, const char *subdir, uuid_t index,
+ char *index_path, size_t len)
+{
+ make_index_dir_path (base, subdir, index_path, len);
+ snprintf (index_path + strlen (index_path), len - strlen (index_path),
+ "/%s-%s", subdir, uuid_utoa (index));
+}
+
+static void
+make_gfid_path (char *base, const char *subdir, uuid_t gfid,
+ char *gfid_path, size_t len)
+{
+ make_index_dir_path (base, subdir, gfid_path, len);
+ snprintf (gfid_path + strlen (gfid_path), len - strlen (gfid_path),
+ "/%s", uuid_utoa (gfid));
+}
+
+static void
+make_file_path (char *base, const char *subdir, const char *filename,
+ char *file_path, size_t len)
+{
+ make_index_dir_path (base, subdir, file_path, len);
+ snprintf (file_path + strlen (file_path), len - strlen (file_path),
+ "/%s", filename);
+}
+
+static void
+check_delete_stale_index_file (xlator_t *this, char *filename)
+{
+ int ret = 0;
+ struct stat st = {0};
+ struct stat base_index_st = {0};
+ char filepath[PATH_MAX] = {0};
+ char filepath_under_base_indices_holder[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->to_be_healed_states != synced_state)
+ return;
+
+ make_file_path (priv->index_basepath, XATTROP_SUBDIR,
+ filename, filepath, sizeof (filepath));
+
+ make_file_path (priv->index_basepath, BASE_INDICES_HOLDER_SUBDIR,
+ filename, filepath_under_base_indices_holder,
+ sizeof (filepath_under_base_indices_holder));
+
+
+ ret = stat (filepath_under_base_indices_holder, &base_index_st);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Base index is not created"
+ "under index/base_indices_holder");
+ return;
+ }
+
+ ret = stat (filepath, &st);
+ if (!ret && st.st_nlink == 2) {
+ unlink (filepath);
+ unlink (filepath_under_base_indices_holder);
+ }
+}
+
+static int
+index_fill_readdir (fd_t *fd, DIR *dir, off_t off,
+ size_t size, gf_dirent_t *entries, readdir_directory type)
+{
+ off_t in_case = -1;
+ size_t filled = 0;
+ int count = 0;
+ char entrybuf[sizeof(struct dirent) + 256 + 8];
+ struct dirent *entry = NULL;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (!off) {
+ rewinddir (dir);
+ } else {
+ seekdir (dir, off);
+ }
+
+ while (filled <= size) {
+ in_case = telldir (dir);
+
+ if (in_case == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "telldir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+
+ errno = 0;
+ entry = NULL;
+ readdir_r (dir, (struct dirent *)entrybuf, &entry);
+
+ if (!entry) {
+ if (errno == EBADF) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "readdir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+ break;
+ }
+
+ if (!strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-")) &&
+ (type == INDEX_XATTROP)) {
+ check_delete_stale_index_file (this, entry->d_name);
+ continue;
+ }
+
+ this_size = max (sizeof (gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir (dir, in_case);
+ break;
+ }
+
+ this_entry = gf_dirent_for_name (entry->d_name);
+
+ if (!this_entry) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not create gf_dirent for entry %s: (%s)",
+ entry->d_name, strerror (errno));
+ goto out;
+ }
+ this_entry->d_off = telldir (dir);
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail (&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count ++;
+ }
+
+ if ((!readdir (dir) && (errno == 0)))
+ /* Indicate EOF */
+ errno = ENOENT;
+out:
+ return count;
+}
+
+int
+sync_base_indices (void *index_priv)
+{
+ index_priv_t *priv = NULL;
+ DIR *dir_base_holder = NULL;
+ DIR *xattrop_dir = NULL;
+ struct dirent *entry = NULL;
+ char base_indices_holder[PATH_MAX] = {0};
+ char xattrop_directory[PATH_MAX] = {0};
+ char base_index_path[PATH_MAX] = {0};
+ char xattrop_index_path[PATH_MAX] = {0};
+ int ret = 0;
+
+ priv = index_priv;
+
+ snprintf (base_indices_holder, PATH_MAX, "%s/%s", priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR);
+ snprintf (xattrop_directory, PATH_MAX, "%s/%s", priv->index_basepath,
+ XATTROP_SUBDIR);
+
+ if ((dir_base_holder = opendir(base_indices_holder)) == NULL) {
+ ret = -1;
+ goto out;
+ }
+ if ((xattrop_dir = opendir (xattrop_directory)) == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->to_be_healed_states = sync_started;
+ while ((entry = readdir(xattrop_dir)) != NULL) {
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, "..")) {
+ continue;
+ }
+ if (strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-"))) {
+ continue;
+ }
+ if (!strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-"))) {
+
+ snprintf (xattrop_index_path, PATH_MAX, "%s/%s",
+ xattrop_directory, entry->d_name);
+
+ snprintf (base_index_path, PATH_MAX, "%s/%s",
+ base_indices_holder, entry->d_name);
+
+ ret = link (xattrop_index_path, base_index_path);
+ if (ret && errno != EEXIST)
+ goto out;
+
+ }
+ }
+ ret = closedir (xattrop_dir);
+ if (ret)
+ goto out;
+ ret = closedir (dir_base_holder);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+int
+base_indices_syncing_done (int ret, call_frame_t *frame, void *data)
+{
+ index_priv_t *priv = NULL;
+ priv = data;
+
+ if (!priv)
+ goto out;
+
+ if (ret) {
+ priv->to_be_healed_states = sync_not_started;
+ } else {
+ priv->to_be_healed_states = synced_state;
+ }
+
+ STACK_DESTROY (frame->root);
+
+out:
+ return 0;
+}
+
+int
+sync_base_indices_from_xattrop (xlator_t *this)
+{
+
+ index_priv_t *priv = NULL;
+ char base_indices_holder[PATH_MAX] = {0};
+ int ret = 0;
+ struct stat st = {0};
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ call_frame_t *frame = NULL;
+
+ priv = this->private;
+
+ if (priv->to_be_healed_states != sync_not_started) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (base_indices_holder, PATH_MAX, "%s/%s", priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR);
+
+ ret = stat (base_indices_holder, &st);
+
+ if (ret && (errno != ENOENT)) {
+ goto out;
+ } else if (errno == ENOENT) {
+ ret = index_dir_create (this, BASE_INDICES_HOLDER_SUBDIR);
+ if (ret)
+ goto out;
+ } else {
+ if ((dir = opendir (base_indices_holder)) == NULL) {
+ ret = -1;
+ goto out;
+ }
+ while ((entry = readdir (dir)) != NULL) {
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name,"..")) {
+ continue;
+ }
+ ret = unlink (entry->d_name);
+ if (ret)
+ goto out;
+ }
+ closedir (dir);
+ }
+
+ /*At this point of time we have index/base_indicies_holder directory
+ *is with no entries*/
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ set_lk_owner_from_ptr (&frame->root->lk_owner, frame->root);
+
+ frame->root->pid = LOW_PRIO_PROC_PID;
+
+ ret = synctask_new (this->ctx->env, sync_base_indices,
+ base_indices_syncing_done,frame, priv);
+
+
+
+out:
+ return ret;
+
+}
+
+int
+index_add (xlator_t *this, uuid_t gfid, const char *subdir)
+{
+ int32_t op_errno = 0;
+ char gfid_path[PATH_MAX] = {0};
+ char index_path[PATH_MAX] = {0};
+ char base_path[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t index = {0};
+ index_priv_t *priv = NULL;
+ struct stat st = {0};
+ int fd = 0;
+ int index_created = 0;
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+
+ ret = stat (gfid_path, &st);
+ if (!ret)
+ goto out;
+ index_get_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, index_path, sizeof (index_path));
+ ret = link (index_path, gfid_path);
+ if (!ret || (errno == EEXIST)) {
+ ret = 0;
+ index_created = 1;
+ goto out;
+ }
+
+
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ ret = index_dir_create (this, subdir);
+ if (ret)
+ goto out;
+ } else if (op_errno == EMLINK) {
+ index_generate_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, index_path, sizeof (index_path));
+ } else {
+ goto out;
+ }
+
+ fd = creat (index_path, 0);
+ if ((fd < 0) && (errno != EEXIST)) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
+ "create index (%s)", uuid_utoa (gfid),
+ strerror (errno));
+ goto out;
+ }
+
+ if (fd >= 0)
+ close (fd);
+
+ ret = link (index_path, gfid_path);
+ if (ret && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
+ "add to index (%s)", uuid_utoa (gfid),
+ strerror (errno));
+ goto out;
+ } else {
+ index_created = 1;
+ }
+
+ if (priv->to_be_healed_states != sync_not_started) {
+ make_index_path (priv->index_basepath,
+ GF_BASE_INDICES_HOLDER_GFID,
+ index, base_path, sizeof (base_path));
+ ret = link (index_path, base_path);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ /*If base_indices_holder is not created: create and sync
+ *If directory is present: delete contents and start syncing
+ *If syncing is in progress :No need to do any thing
+ *If syncing is done: No need to do anything*/
+ if (!ret) {
+ switch (priv->to_be_healed_states) {
+ case sync_not_started:
+ ret = sync_base_indices_from_xattrop (this);
+ break;
+ case sync_started:
+ case synced_state:
+ /*No need to do anything*/
+ break;
+ }
+ }
+ return ret;
+}
+
+int
+index_del (xlator_t *this, uuid_t gfid, const char *subdir)
+{
+ int32_t op_errno __attribute__((unused)) = 0;
+ index_priv_t *priv = NULL;
+ int ret = 0;
+ char gfid_path[PATH_MAX] = {0};
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+ ret = unlink (gfid_path);
+ if (ret && (errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to delete from index (%s)",
+ gfid_path, strerror (errno));
+ ret = -errno;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+_check_key_is_zero_filled (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ if (mem_0filled ((const char*)v->data, v->len)) {
+ /* -1 means, no more iterations, treat as 'break' */
+ return -1;
+ }
+ return 0;
+}
+
+
+void
+_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ gf_boolean_t zero_xattr = _gf_true;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL);
+ if (ret == -1)
+ zero_xattr = _gf_false;
+
+ ret = index_inode_ctx_get (inode, this, &ctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index",
+ zero_xattr?"add":"del", uuid_utoa (inode->gfid));
+ goto out;
+ }
+ if (zero_xattr) {
+ if (ctx->state == NOTIN)
+ goto out;
+ ret = index_del (this, inode->gfid, XATTROP_SUBDIR);
+ if (!ret)
+ ctx->state = NOTIN;
+ } else {
+ if (ctx->state == IN)
+ goto out;
+ ret = index_add (this, inode->gfid, XATTROP_SUBDIR);
+ if (!ret)
+ ctx->state = IN;
+ }
+out:
+ return;
+}
+
+void
+fop_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ _xattrop_index_action (this, inode, xattr);
+}
+
+void
+fop_fxattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ _xattrop_index_action (this, inode, xattr);
+}
+
+inline gf_boolean_t
+index_xattrop_track (loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ return (flags == GF_XATTROP_ADD_ARRAY);
+}
+
+inline gf_boolean_t
+index_fxattrop_track (fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ return (flags == GF_XATTROP_ADD_ARRAY);
+}
+
+int
+__index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t tmpctx = 0;
+ char index_dir[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = __fd_ctx_get (fd, this, &tmpctx);
+ if (!ret) {
+ fctx = (index_fd_ctx_t*) (long) tmpctx;
+ goto out;
+ }
+
+ fctx = GF_CALLOC (1, sizeof (*fctx), gf_index_fd_ctx_t);
+ if (!fctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ index_dir, sizeof (index_dir));
+ fctx->dir = opendir (index_dir);
+ if (!fctx->dir) {
+ ret = -errno;
+ GF_FREE (fctx);
+ fctx = NULL;
+ goto out;
+ }
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long)fctx);
+ if (ret) {
+ GF_FREE (fctx);
+ fctx = NULL;
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ if (fctx)
+ *ctx = fctx;
+ return ret;
+}
+
+int
+index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ LOCK (&fd->lock);
+ {
+ ret = __index_fd_ctx_get (fd, this, ctx);
+ }
+ UNLOCK (&fd->lock);
+ return ret;
+}
+
+//new - Not NULL means start a fop
+//new - NULL means done processing the fop
+void
+index_queue_process (xlator_t *this, inode_t *inode, call_stub_t *new)
+{
+ call_stub_t *stub = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, &ctx);
+ if (ret)
+ goto unlock;
+
+ if (new) {
+ __index_enqueue (&ctx->callstubs, new);
+ new = NULL;
+ } else {
+ ctx->processing = _gf_false;
+ }
+
+ if (!ctx->processing) {
+ stub = __index_dequeue (&ctx->callstubs);
+ if (stub)
+ ctx->processing = _gf_true;
+ else
+ ctx->processing = _gf_false;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ if (ret && new) {
+ frame = new->frame;
+ if (new->fop == GF_FOP_XATTROP) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ } else if (new->fop == GF_FOP_FXATTROP) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ }
+ call_stub_destroy (new);
+ } else if (stub) {
+ call_resume (stub);
+ }
+ return;
+}
+
+int32_t
+index_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = inode_ref (frame->local);
+ if (op_ret < 0)
+ goto out;
+ fop_xattrop_index_action (this, frame->local, xattr);
+out:
+ INDEX_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process (this, inode, NULL);
+ inode_unref (inode);
+
+ return 0;
+}
+
+int32_t
+index_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = inode_ref (frame->local);
+ if (op_ret < 0)
+ goto out;
+
+ fop_fxattrop_index_action (this, frame->local, xattr);
+out:
+ INDEX_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process (this, inode, NULL);
+ inode_unref (inode);
+
+ return 0;
+}
+
+int
+index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ STACK_WIND (frame, index_xattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr,
+ xdata);
+ return 0;
+}
+
+int
+index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ STACK_WIND (frame, index_fxattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr,
+ xdata);
+ return 0;
+}
+
+int32_t
+index_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!index_xattrop_track (loc, flags, dict))
+ goto out;
+
+ frame->local = inode_ref (loc->inode);
+ stub = fop_xattrop_stub (frame, index_xattrop_wrapper,
+ loc, flags, dict, xdata);
+ if (!stub) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ index_queue_process (this, loc->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+index_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!index_fxattrop_track (fd, flags, dict))
+ goto out;
+
+ frame->local = inode_ref (fd->inode);
+ stub = fop_fxattrop_stub (frame, index_fxattrop_wrapper,
+ fd, flags, dict, xdata);
+ if (!stub) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, NULL, xdata);
+ return 0;
+ }
+
+ index_queue_process (this, fd->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+index_getxattr_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ xattr = dict_new ();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ if (!strcmp (name, GF_XATTROP_INDEX_GFID)) {
+
+ ret = dict_set_static_bin (xattr, (char*)name,
+ priv->xattrop_vgfid,
+ sizeof (priv->xattrop_vgfid));
+
+ } else if (!strcmp (name, GF_BASE_INDICES_HOLDER_GFID)) {
+
+ ret = dict_set_static_bin (xattr, (char*)name,
+ priv->base_indices_holder_vgfid,
+ sizeof (priv->base_indices_holder_vgfid));
+ }
+ if (ret) {
+ ret = -ENOMEM;
+ gf_log (THIS->name, GF_LOG_ERROR, "xattrop index "
+ "gfid set failed");
+ goto done;
+ }
+done:
+ if (ret)
+ STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, xattr, xdata);
+ else
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+int32_t
+index_lookup_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ index_priv_t *priv = NULL;
+ struct stat lstatbuf = {0};
+ int ret = 0;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ char path[PATH_MAX] = {0};
+ struct iatt stbuf = {0, };
+ struct iatt postparent = {0,};
+ dict_t *xattr = NULL;
+ gf_boolean_t is_dir = _gf_false;
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO (loc, done);
+ if (!uuid_compare (loc->gfid, priv->xattrop_vgfid)) {
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ path, sizeof (path));
+ is_dir = _gf_true;
+ } else if (!uuid_compare (loc->pargfid, priv->xattrop_vgfid)) {
+ make_file_path (priv->index_basepath, XATTROP_SUBDIR,
+ loc->name, path, sizeof (path));
+ } else if (!uuid_compare (loc->gfid,priv->base_indices_holder_vgfid)){
+ make_index_dir_path (priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR, path,
+ sizeof (path));
+ is_dir = _gf_true;
+ } else if (!uuid_compare (loc->pargfid, priv->base_indices_holder_vgfid)) {
+ make_file_path (priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR,loc->name, path,
+ sizeof (path));
+ }
+
+ ret = lstat (path, &lstatbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir "
+ "(%s)", strerror (errno));
+ op_errno = errno;
+ goto done;
+ } else if (!S_ISDIR (lstatbuf.st_mode) && is_dir) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir, "
+ "not a directory");
+ op_errno = ENOENT;
+ goto done;
+ }
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto done;
+ }
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+ if (is_dir && !uuid_compare (loc->gfid, priv->xattrop_vgfid)) {
+ uuid_copy (stbuf.ia_gfid, priv->xattrop_vgfid);
+ } else if (is_dir &&
+ !uuid_compare (loc->gfid, priv->base_indices_holder_vgfid)) {
+ uuid_copy (stbuf.ia_gfid, priv->base_indices_holder_vgfid);
+ } else {
+ uuid_generate (stbuf.ia_gfid);
+ }
+ stbuf.ia_ino = -1;
+ op_ret = 0;
+done:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
+ loc->inode, &stbuf, xattr, &postparent);
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+}
+
+int32_t
+base_indices_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ char base_indices_holder[PATH_MAX] = {0};
+ DIR *dir = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+
+ priv = this->private;
+
+ make_index_dir_path (priv->index_basepath, BASE_INDICES_HOLDER_SUBDIR,
+ base_indices_holder, sizeof (base_indices_holder));
+
+ dir = opendir (base_indices_holder);
+ if (!dir) {
+ op_errno = EINVAL;
+ goto done;
+ }
+
+
+ INIT_LIST_HEAD (&entries.list);
+
+ count = index_fill_readdir (fd, dir, off, size, &entries,
+ BASE_INDICES_HOLDER);
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+ closedir (dir);
+done:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+int32_t
+index_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ index_fd_ctx_t *fctx = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = index_fd_ctx_get (fd, this, &fctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto done;
+ }
+
+ dir = fctx->dir;
+
+ if (!dir) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto done;
+ }
+
+ count = index_fill_readdir (fd, dir, off, size, &entries,
+ INDEX_XATTROP);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+done:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+int
+index_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int ret = 0;
+ struct iatt preparent = {0};
+ struct iatt postparent = {0};
+ char index_dir[PATH_MAX] = {0};
+ struct stat lstatbuf = {0};
+ uuid_t gfid = {0};
+
+ priv = this->private;
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ index_dir, sizeof (index_dir));
+ ret = lstat (index_dir, &lstatbuf);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto done;
+ }
+
+ iatt_from_stat (&preparent, &lstatbuf);
+ uuid_copy (preparent.ia_gfid, priv->xattrop_vgfid);
+ preparent.ia_ino = -1;
+ uuid_parse (loc->name, gfid);
+ ret = index_del (this, gfid, XATTROP_SUBDIR);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+ memset (&lstatbuf, 0, sizeof (lstatbuf));
+ ret = lstat (index_dir, &lstatbuf);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto done;
+ }
+ iatt_from_stat (&postparent, &lstatbuf);
+ uuid_copy (postparent.ia_gfid, priv->xattrop_vgfid);
+ postparent.ia_ino = -1;
+done:
+ INDEX_STACK_UNWIND (unlink, frame, op_ret, op_errno, &preparent,
+ &postparent, xdata);
+ return 0;
+}
+
+int32_t
+index_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!name)
+ goto out;
+ if (strcmp (GF_XATTROP_INDEX_GFID, name) &&
+ strcmp (GF_BASE_INDICES_HOLDER_GFID, name))
+ goto out;
+
+ stub = fop_getxattr_stub (frame, index_getxattr_wrapper, loc, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+index_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (uuid_compare (loc->gfid, priv->xattrop_vgfid) &&
+ uuid_compare (loc->pargfid, priv->xattrop_vgfid) &&
+ uuid_compare (loc->gfid, priv->base_indices_holder_vgfid) &&
+ uuid_compare (loc->pargfid, priv->base_indices_holder_vgfid))
+ goto normal;
+
+ stub = fop_lookup_stub (frame, index_lookup_wrapper, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, loc->inode,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+normal:
+ STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ return 0;
+}
+
+int32_t
+index_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid) &&
+ uuid_compare (fd->inode->gfid, priv->base_indices_holder_vgfid))
+ goto out;
+
+ if (!uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) {
+ stub = fop_readdir_stub (frame, index_readdir_wrapper, fd, size,
+ off, xdata);
+ } else if (!uuid_compare (fd->inode->gfid,
+ priv->base_indices_holder_vgfid)) {
+ stub = fop_readdir_stub (frame, base_indices_readdir_wrapper,
+ fd, size, off, xdata);
+ }
+
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
+}
+
+int
+index_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (loc->pargfid, priv->xattrop_vgfid))
+ goto out;
+
+ stub = fop_unlink_stub (frame, index_unlink_wrapper, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_index_mt_end + 1);
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ index_priv_t *priv = NULL;
+ pthread_t thread;
+ pthread_attr_t w_attr;
+ gf_boolean_t mutex_inited = _gf_false;
+ gf_boolean_t cond_inited = _gf_false;
+ gf_boolean_t attr_inited = _gf_false;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'index' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_index_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT (&priv->lock);
+ if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_cond_init failed (%d)", ret);
+ goto out;
+ }
+ cond_inited = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_mutex_init failed (%d)", ret);
+ goto out;
+ }
+ mutex_inited = _gf_true;
+
+ if ((ret = pthread_attr_init (&w_attr)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_attr_init failed (%d)", ret);
+ goto out;
+ }
+ attr_inited = _gf_true;
+
+ ret = pthread_attr_setstacksize (&w_attr, INDEX_THREAD_STACK_SIZE);
+ if (ret == EINVAL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Using default thread stack size");
+ }
+ GF_OPTION_INIT ("index-base", priv->index_basepath, path, out);
+ uuid_generate (priv->index);
+ uuid_generate (priv->xattrop_vgfid);
+ /*base_indices_holder is a directory which contains hard links to
+ * all base indices inside indices/xattrop directory*/
+ uuid_generate (priv->base_indices_holder_vgfid);
+ INIT_LIST_HEAD (&priv->callstubs);
+
+ this->private = priv;
+ ret = gf_thread_create (&thread, &w_attr, index_worker, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to create "
+ "worker thread, aborting");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret) {
+ if (cond_inited)
+ pthread_cond_destroy (&priv->cond);
+ if (mutex_inited)
+ pthread_mutex_destroy (&priv->mutex);
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ }
+ if (attr_inited)
+ pthread_attr_destroy (&w_attr);
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ this->private = NULL;
+ LOCK_DESTROY (&priv->lock);
+ pthread_cond_destroy (&priv->cond);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+index_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_cache = 0;
+ if (!inode_ctx_del (inode, this, &tmp_cache))
+ GF_FREE ((index_inode_ctx_t*) (long)tmp_cache);
+
+ return 0;
+}
+
+int32_t
+index_releasedir (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ if (fctx->dir)
+ closedir (fctx->dir);
+
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int32_t
+index_release (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ ret = default_notify (this, event, data);
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .xattrop = index_xattrop,
+ .fxattrop = index_fxattrop,
+
+ //interface functions follow
+ .getxattr = index_getxattr,
+ .lookup = index_lookup,
+ .readdir = index_readdir,
+ .unlink = index_unlink
+};
+
+struct xlator_dumpops dumpops;
+
+struct xlator_cbks cbks = {
+ .forget = index_forget,
+ .release = index_release,
+ .releasedir = index_releasedir
+};
+
+struct volume_options options[] = {
+ { .key = {"index-base" },
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "path where the index files need to be stored",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
new file mode 100644
index 000000000..d6dcb1c23
--- /dev/null
+++ b/xlators/features/index/src/index.h
@@ -0,0 +1,73 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INDEX_H__
+#define __INDEX_H__
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "index-mem-types.h"
+
+#define INDEX_THREAD_STACK_SIZE ((size_t)(1024*1024))
+
+typedef enum {
+ UNKNOWN,
+ IN,
+ NOTIN
+} index_state_t;
+
+typedef struct index_inode_ctx {
+ gf_boolean_t processing;
+ struct list_head callstubs;
+ index_state_t state;
+} index_inode_ctx_t;
+
+typedef struct index_fd_ctx {
+ DIR *dir;
+} index_fd_ctx_t;
+
+typedef enum {
+ sync_not_started,
+ sync_started,
+ synced_state,
+} to_be_healed_states_t;
+
+typedef enum {
+ INDEX_XATTROP,
+ BASE_INDICES_HOLDER,
+} readdir_directory;
+
+typedef struct index_priv {
+ char *index_basepath;
+ uuid_t index;
+ gf_lock_t lock;
+ uuid_t xattrop_vgfid;//virtual gfid of the xattrop index dir
+ uuid_t base_indices_holder_vgfid; //virtual gfid of the
+ //to_be_healed_xattrop directory
+ struct list_head callstubs;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ to_be_healed_states_t to_be_healed_states;
+} index_priv_t;
+
+#define INDEX_STACK_UNWIND(fop, frame, params ...) \
+do { \
+ if (frame) { \
+ inode_t *_inode = frame->local; \
+ frame->local = NULL; \
+ inode_unref (_inode); \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+} while (0)
+
+#endif
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
index 53dd3aa5d..0f79731b4 100644
--- a/xlators/features/locks/src/Makefile.am
+++ b/xlators/features/locks/src/Makefile.am
@@ -1,15 +1,18 @@
xlator_LTLIBRARIES = locks.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-locks_la_LDFLAGS = -module -avoidversion
+locks_la_LDFLAGS = -module -avoid-version
-locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c
-locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \
+ clear.c
+locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = locks.h common.h locks-mem-types.h
+noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
@@ -17,4 +20,4 @@ uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
install-data-hook:
- ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so \ No newline at end of file
+ ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
new file mode 100644
index 000000000..124b9ad0f
--- /dev/null
+++ b/xlators/features/locks/src/clear.c
@@ -0,0 +1,424 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+#include "common.h"
+#include "statedump.h"
+#include "clear.h"
+
+int
+clrlk_get_kind (char *kind)
+{
+ char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted",
+ "all"};
+ int ret_kind = CLRLK_KIND_MAX;
+ int i = 0;
+
+ for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) {
+ if (!strcmp (clrlk_kinds[i], kind)) {
+ ret_kind = i;
+ break;
+ }
+ }
+
+ return ret_kind;
+}
+
+int
+clrlk_get_type (char *type)
+{
+ char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"};
+ int ret_type = CLRLK_TYPE_MAX;
+ int i = 0;
+
+ for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) {
+ if (!strcmp (clrlk_types[i], type)) {
+ ret_type = i;
+ break;
+ }
+ }
+
+ return ret_type;
+}
+
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range)
+{
+ int ret = -1;
+
+ if (!chk_range)
+ goto out;
+
+ if (!range_str) {
+ ret = 0;
+ *chk_range = _gf_false;
+ goto out;
+ }
+
+ if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence,
+ &ulock->l_start, &ulock->l_len) != 3) {
+ goto out;
+ }
+
+ ret = 0;
+ *chk_range = _gf_true;
+out:
+ return ret;
+}
+
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args)
+{
+ char *opts = NULL;
+ char *cur = NULL;
+ char *tok = NULL;
+ char *sptr = NULL;
+ char *free_ptr = NULL;
+ char kw[KW_MAX] = {[KW_TYPE] = 't',
+ [KW_KIND] = 'k',
+ };
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (cmd);
+ free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char);
+ if (!opts)
+ goto out;
+
+ if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /*clr_lk_prefix.ttype.kkind.args, args - type specific*/
+ cur = opts;
+ for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr));
+ cur = NULL, i++) {
+ if (tok[0] != kw[i]) {
+ ret = -1;
+ goto out;
+ }
+ if (i == KW_TYPE)
+ args->type = clrlk_get_type (tok+1);
+ if (i == KW_KIND)
+ args->kind = clrlk_get_kind (tok+1);
+ }
+
+ if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX))
+ goto out;
+
+ /*optional args, neither range nor basename can 'legally' contain
+ * "/" in them*/
+ tok = strtok_r (NULL, "/", &sptr);
+ if (tok)
+ args->opts = gf_strdup (tok);
+
+ ret = 0;
+out:
+ GF_FREE (free_ptr);
+ return ret;
+}
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno)
+{
+ posix_lock_t *plock = NULL;
+ posix_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list,
+ list) {
+ if ((plock->blocked &&
+ !(args->kind & CLRLK_BLOCKED)) ||
+ (!plock->blocked &&
+ !(args->kind & CLRLK_GRANTED)))
+ continue;
+
+ if (chk_range &&
+ (plock->user_flock.l_whence != ulock.l_whence
+ || plock->user_flock.l_start != ulock.l_start
+ || plock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ list_del_init (&plock->list);
+ if (plock->blocked) {
+ bcount++;
+ pl_trace_out (this, plock->frame, NULL, NULL,
+ F_SETLKW, &plock->user_flock,
+ -1, EAGAIN, NULL);
+
+ STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN,
+ &plock->user_flock, NULL);
+
+ } else {
+ gcount++;
+ }
+ GF_FREE (plock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ grant_blocked_locks (this, pl_inode);
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_inode_lock_t *ilock = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ bcount++;
+ list_del_init (&ilock->blocked_locks);
+ list_add (&ilock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) {
+ list_del_init (&ilock->blocked_locks);
+ pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN,
+ ilock->volume);
+ STACK_UNWIND_STRICT (inodelk, ilock->frame, -1,
+ EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list,
+ list) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ gcount++;
+ list_del_init (&ilock->list);
+ list_add (&ilock->list, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, list) {
+ list_del_init (&ilock->list);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ ret = 0;
+out:
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_entry_lock_t *elock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ int bcount = 0;
+ int gcount = 0;
+ int ret = -1;
+ struct list_head removed;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ bcount++;
+
+ list_del_init (&elock->blocked_locks);
+ list_add_tail (&elock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &released, blocked_locks) {
+ list_del_init (&elock->blocked_locks);
+ entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type,
+ -1, EAGAIN);
+ STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL);
+ GF_FREE ((char *) elock->basename);
+ GF_FREE (elock->connection_id);
+ GF_FREE (elock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ INIT_LIST_HEAD (&removed);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->entrylk_list,
+ domain_list) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ gcount++;
+ list_del_init (&elock->domain_list);
+ list_add_tail (&elock->domain_list, &removed);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &removed, domain_list) {
+ grant_blocked_entry_locks (this, pl_inode, elock, dom);
+ }
+
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno)
+{
+ pl_dom_list_t *dom = NULL;
+ int ret = -1;
+ int tmp_bcount = 0;
+ int tmp_gcount = 0;
+
+ if (list_empty (&pl_inode->dom_list)) {
+ ret = 0;
+ goto out;
+ }
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ tmp_bcount = tmp_gcount = 0;
+
+ switch (args->type)
+ {
+ case CLRLK_INODE:
+ ret = clrlk_clear_inodelk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ case CLRLK_ENTRY:
+ ret = clrlk_clear_entrylk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ }
+
+ *blkd += tmp_bcount;
+ *granted += tmp_gcount;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
new file mode 100644
index 000000000..511f3f74a
--- /dev/null
+++ b/xlators/features/locks/src/clear.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLEAR_H__
+#define __CLEAR_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "compat-errno.h"
+#include "stack.h"
+#include "call-stub.h"
+#include "locks.h"
+
+typedef enum {
+ CLRLK_INODE,
+ CLRLK_ENTRY,
+ CLRLK_POSIX,
+ CLRLK_TYPE_MAX
+} clrlk_type;
+
+typedef enum {
+ CLRLK_BLOCKED = 1,
+ CLRLK_GRANTED,
+ CLRLK_ALL,
+ CLRLK_KIND_MAX
+} clrlk_kind;
+
+typedef enum {
+ KW_TYPE,
+ KW_KIND,
+ /*add new keywords here*/
+ KW_MAX
+} clrlk_opts;
+
+struct _clrlk_args;
+typedef struct _clrlk_args clrlk_args;
+
+struct _clrlk_args {
+ int type;
+ int kind;
+ char *opts;
+};
+
+int
+clrlk_get__kind (char *kind);
+int
+clrlk_get_type (char *type);
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range);
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args);
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno);
+#endif /* __CLEAR_H__ */
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 4e089a0ee..b3309580d 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -45,8 +35,9 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock);
static int
pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
posix_lock_t *old_lock);
+
static pl_dom_list_t *
-allocate_domain (const char *volume)
+__allocate_domain (const char *volume)
{
pl_dom_list_t *dom = NULL;
@@ -85,24 +76,26 @@ get_domain (pl_inode_t *pl_inode, const char *volume)
{
pl_dom_list_t *dom = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, pl_inode, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, volume, out);
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (strcmp (dom->domain, volume) == 0)
- goto found;
+ GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", volume, out);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (strcmp (dom->domain, volume) == 0)
+ goto unlock;
+ }
+ dom = __allocate_domain (volume);
+ if (dom)
+ list_add (&dom->inode_list, &pl_inode->dom_list);
}
-
- dom = allocate_domain (volume);
- if (dom)
- list_add (&dom->inode_list, &pl_inode->dom_list);
-found:
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
if (dom) {
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Domain %s found", volume);
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume);
} else {
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Domain %s not found", volume);
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume);
}
out:
return dom;
@@ -143,10 +136,10 @@ __pl_inode_is_empty (pl_inode_t *pl_inode)
void
pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame)
{
- snprintf (str, size, "Pid=%llu, lk-owner=%llu, Transport=%p, Frame=%llu",
+ snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu",
(unsigned long long) frame->root->pid,
- (unsigned long long) frame->root->lk_owner,
- (void *)frame->root->trans,
+ lkowner_utoa (&frame->root->lk_owner),
+ frame->root->client,
(unsigned long long) frame->root->unique);
}
@@ -176,18 +169,17 @@ pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc)
ipath = NULL;
}
- snprintf (str, size, "ino=%llu, fd=%p, path=%s",
- (unsigned long long) inode->ino, fd,
+ snprintf (str, size, "gfid=%s, fd=%p, path=%s",
+ uuid_utoa (inode->gfid), fd,
ipath ? ipath : "<nul>");
- if (ipath)
- GF_FREE (ipath);
+ GF_FREE (ipath);
}
void
pl_print_lock (char *str, int size, int cmd,
- struct gf_flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -235,11 +227,11 @@ pl_print_lock (char *str, int size, int cmd,
}
snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
@@ -262,7 +254,7 @@ pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
gf_log (this->name, GF_LOG_INFO,
"[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
@@ -312,7 +304,7 @@ pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
pl_print_verdict (verdict, 32, op_ret, op_errno);
@@ -342,7 +334,7 @@ pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
gf_log (this->name, GF_LOG_INFO,
"[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
@@ -400,6 +392,7 @@ pl_update_refkeeper (xlator_t *this, inode_t *inode)
pl_inode_t *pl_inode = NULL;
int is_empty = 0;
int need_unref = 0;
+ int need_ref = 0;
pl_inode = pl_inode_get (this, inode);
@@ -413,13 +406,17 @@ pl_update_refkeeper (xlator_t *this, inode_t *inode)
}
if (!is_empty && !pl_inode->refkeeper) {
- pl_inode->refkeeper = inode_ref (inode);
+ need_ref = 1;
+ pl_inode->refkeeper = inode;
}
}
pthread_mutex_unlock (&pl_inode->mutex);
if (need_unref)
inode_unref (inode);
+
+ if (need_ref)
+ inode_ref (inode);
}
@@ -430,46 +427,50 @@ pl_inode_get (xlator_t *this, inode_t *inode)
pl_inode_t *pl_inode = NULL;
int ret = 0;
- ret = inode_ctx_get (inode, this,&tmp_pl_inode);
- if (ret == 0) {
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- goto out;
- }
- pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
- gf_locks_mt_pl_inode_t);
- if (!pl_inode) {
- goto out;
- }
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto unlock;
+ }
+ pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
+ gf_locks_mt_pl_inode_t);
+ if (!pl_inode) {
+ goto unlock;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "Allocating new pl inode");
+ gf_log (this->name, GF_LOG_TRACE,
+ "Allocating new pl inode");
- pthread_mutex_init (&pl_inode->mutex, NULL);
+ pthread_mutex_init (&pl_inode->mutex, NULL);
- INIT_LIST_HEAD (&pl_inode->dom_list);
- INIT_LIST_HEAD (&pl_inode->ext_list);
- INIT_LIST_HEAD (&pl_inode->rw_list);
- INIT_LIST_HEAD (&pl_inode->reservelk_list);
- INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
- INIT_LIST_HEAD (&pl_inode->blocked_calls);
+ INIT_LIST_HEAD (&pl_inode->dom_list);
+ INIT_LIST_HEAD (&pl_inode->ext_list);
+ INIT_LIST_HEAD (&pl_inode->rw_list);
+ INIT_LIST_HEAD (&pl_inode->reservelk_list);
+ INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
+ INIT_LIST_HEAD (&pl_inode->blocked_calls);
- inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ }
+unlock:
+ UNLOCK (&inode->lock);
-out:
return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
-new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, fd_t *fd)
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd)
{
posix_lock_t *lock = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, flock, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, transport, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, fd, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", flock, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", client, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fd, out);
lock = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
@@ -485,11 +486,11 @@ new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
else
lock->fl_end = flock->l_start + flock->l_len - 1;
- lock->transport = transport;
+ lock->client = client;
lock->fd_num = fd_to_fdnum (fd);
lock->fd = fd;
lock->client_pid = client_pid;
- lock->owner = owner;
+ lock->owner = *owner;
INIT_LIST_HEAD (&lock->list);
@@ -533,6 +534,11 @@ posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock)
static void
__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
{
+ if (lock->blocked)
+ gettimeofday (&lock->blkd_time, NULL);
+ else
+ gettimeofday (&lock->granted_time, NULL);
+
list_add_tail (&lock->list, &pl_inode->ext_list);
return;
@@ -559,8 +565,8 @@ int
same_owner (posix_lock_t *l1, posix_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
@@ -689,7 +695,7 @@ subtract_locks (posix_lock_t *big, posix_lock_t *small)
}
GF_ASSERT (0);
- gf_log (POSIX_LOCKS, GF_LOG_ERROR, "Unexpected case in subtract_locks");
+ gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks");
out:
if (v.locks[0]) {
@@ -709,6 +715,36 @@ done:
return v;
}
+static posix_lock_t *
+first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *conf = NULL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
+
+ if (locks_overlap (l, lock)) {
+ if (same_owner (l, lock))
+ continue;
+
+ if ((l->fl_type == F_WRLCK) ||
+ (lock->fl_type == F_WRLCK)) {
+ conf = l;
+ goto unlock;
+ }
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return conf;
+}
+
/*
Start searching from {begin}, and return the first lock that
conflicts, NULL if no conflict
@@ -767,6 +803,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
struct _values v = { .locks = {0, 0, 0} };
list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) {
+ if (conf->blocked)
+ continue;
if (!locks_overlap (conf, lock))
continue;
@@ -775,7 +813,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
sum = add_locks (lock, conf);
sum->fl_type = lock->fl_type;
- sum->transport = lock->transport;
+ sum->client = lock->client;
sum->fd_num = lock->fd_num;
sum->client_pid = lock->client_pid;
sum->owner = lock->owner;
@@ -784,6 +822,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
__destroy_lock (conf);
__destroy_lock (lock);
+ INIT_LIST_HEAD (&sum->list);
+ posix_lock_to_flock (sum, &sum->user_flock);
__insert_and_merge (pl_inode, sum);
return;
@@ -791,7 +831,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
sum = add_locks (lock, conf);
sum->fl_type = conf->fl_type;
- sum->transport = conf->transport;
+ sum->client = conf->client;
sum->fd_num = conf->fd_num;
sum->client_pid = conf->client_pid;
sum->owner = conf->owner;
@@ -811,6 +851,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
continue;
INIT_LIST_HEAD (&v.locks[i]->list);
+ posix_lock_to_flock (v.locks[i],
+ &v.locks[i]->user_flock);
__insert_and_merge (pl_inode,
v.locks[i]);
}
@@ -879,10 +921,9 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *g
posix_lock_to_flock (l, &conf->user_flock);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Granted",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted",
l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid,
- l->owner,
+ l->client_pid, lkowner_utoa (&l->owner),
l->user_flock.l_start,
l->user_flock.l_len);
@@ -918,7 +959,8 @@ grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
&lock->user_flock, 0, 0, NULL);
- STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
GF_FREE (lock);
}
@@ -947,8 +989,8 @@ pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
flock.l_len = old_lock->user_flock.l_len;
- unlock_lock = new_posix_lock (&flock, old_lock->transport,
- old_lock->client_pid, old_lock->owner,
+ unlock_lock = new_posix_lock (&flock, old_lock->client,
+ old_lock->client_pid, &old_lock->owner,
old_lock->fd);
GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out);
ret = 0;
@@ -963,7 +1005,8 @@ pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
&lock->user_flock, 0, 0, NULL);
- STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
GF_FREE (lock);
}
@@ -1001,19 +1044,19 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
if (__is_lock_grantable (pl_inode, lock)) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => OK",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
__insert_and_merge (pl_inode, lock);
} else if (can_block) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
lock->blocked = 1;
@@ -1021,10 +1064,10 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
ret = -1;
} else {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => NOK",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
errno = EAGAIN;
@@ -1046,7 +1089,7 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
{
posix_lock_t *conf = NULL;
- conf = first_overlap (pl_inode, lock);
+ conf = first_conflicting_overlap (pl_inode, lock);
if (conf == NULL) {
lock->fl_type = F_UNLCK;
@@ -1055,3 +1098,124 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
return conf;
}
+
+
+struct _lock_table *
+pl_lock_table_new (void)
+{
+ struct _lock_table *new = NULL;
+
+ new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table);
+ if (new == NULL) {
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->entrylk_lockers);
+ INIT_LIST_HEAD (&new->inodelk_lockers);
+ LOCK_INIT (&new->lock);
+out:
+ return new;
+}
+
+
+int
+pl_add_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner,
+ glusterfs_fop_t type)
+{
+ int32_t ret = -1;
+ struct _locker *new = NULL;
+
+ GF_VALIDATE_OR_GOTO ("lock-table", table, out);
+ GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
+
+ new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker);
+ if (new == NULL) {
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->lockers);
+
+ new->volume = gf_strdup (volume);
+
+ if (fd == NULL) {
+ loc_copy (&new->loc, loc);
+ } else {
+ new->fd = fd_ref (fd);
+ }
+
+ new->pid = pid;
+ new->owner = *owner;
+
+ LOCK (&table->lock);
+ {
+ if (type == GF_FOP_ENTRYLK)
+ list_add_tail (&new->lockers, &table->entrylk_lockers);
+ else
+ list_add_tail (&new->lockers, &table->inodelk_lockers);
+ }
+ UNLOCK (&table->lock);
+out:
+ return ret;
+}
+
+int
+pl_del_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type)
+{
+ struct _locker *locker = NULL;
+ struct _locker *tmp = NULL;
+ int32_t ret = -1;
+ struct list_head *head = NULL;
+ struct list_head del;
+
+ GF_VALIDATE_OR_GOTO ("lock-table", table, out);
+ GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
+
+ INIT_LIST_HEAD (&del);
+
+ LOCK (&table->lock);
+ {
+ if (type == GF_FOP_ENTRYLK) {
+ head = &table->entrylk_lockers;
+ } else {
+ head = &table->inodelk_lockers;
+ }
+
+ list_for_each_entry_safe (locker, tmp, head, lockers) {
+ if (!is_same_lkowner (&locker->owner, owner) ||
+ strcmp (locker->volume, volume))
+ continue;
+
+ /*
+ * It is possible for inodelk lock to come on anon-fd
+ * and inodelk unlock to come on normal fd in case of
+ * client re-opens. So don't check for fds to be equal.
+ */
+ if (locker->fd && fd)
+ list_move_tail (&locker->lockers, &del);
+ else if (locker->loc.inode && loc &&
+ (locker->loc.inode == loc->inode))
+ list_move_tail (&locker->lockers, &del);
+ }
+ }
+ UNLOCK (&table->lock);
+
+ tmp = NULL;
+ locker = NULL;
+
+ list_for_each_entry_safe (locker, tmp, &del, lockers) {
+ list_del_init (&locker->lockers);
+ if (locker->fd)
+ fd_unref (locker->fd);
+ else
+ loc_wipe (&locker->loc);
+
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index dddcd339b..db19ec978 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -1,28 +1,55 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __COMMON_H__
#define __COMMON_H__
+#include "lkowner.h"
+/*dump locks format strings */
+#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
+#define ENTRY_FMT "type=%s on basename=%s"
+#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p"
+#define GRNTD_AT "granted at %s"
+#define BLKD_AT "blocked at %s"
+#define CONN_ID "connection-id=%s"
+#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT
+#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT
+#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT
+
+#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT
+#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT
+#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT
+#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT
+#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+
+struct _locker {
+ struct list_head lockers;
+ char *volume;
+ loc_t loc;
+ fd_t *fd;
+ gf_lkowner_t owner;
+ pid_t pid;
+};
+
+struct _lock_table {
+ struct list_head inodelk_lockers;
+ struct list_head entrylk_lockers;
+ gf_lock_t lock;
+};
+
posix_lock_t *
-new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, fd_t *fd);
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd);
pl_inode_t *
pl_inode_get (xlator_t *this, inode_t *inode);
@@ -54,13 +81,14 @@ pl_dom_list_t *
get_domain (pl_inode_t *pl_inode, const char *volume);
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom);
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom);
void
__delete_inode_lock (pl_inode_lock_t *lock);
void
-__destroy_inode_lock (pl_inode_lock_t *lock);
+__pl_inodelk_unref (pl_inode_lock_t *lock);
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
@@ -69,9 +97,13 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
void pl_update_refkeeper (xlator_t *this, inode_t *inode);
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode);
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname);
+int32_t
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname);
int32_t
+__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode);
+int32_t
get_entrylk_count (xlator_t *this, inode_t *inode);
void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
@@ -130,4 +162,26 @@ pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
posix_lock_t *lock, int can_block);
int
pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
+
+int32_t
+pl_add_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc,
+ fd_t *fd,
+ pid_t pid,
+ gf_lkowner_t *owner,
+ glusterfs_fop_t type);
+
+int32_t
+pl_del_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc,
+ fd_t *fd,
+ gf_lkowner_t *owner,
+ glusterfs_fop_t type);
+
+struct _lock_table *
+pl_lock_table_new (void);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index f22ae0d74..0785dc547 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -35,7 +25,8 @@
static pl_entry_lock_t *
new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- void *trans, pid_t client_pid, uint64_t owner, const char *volume)
+ client_t *client, pid_t client_pid, gf_lkowner_t *owner,
+ const char *volume)
{
pl_entry_lock_t *newlock = NULL;
@@ -46,12 +37,12 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
goto out;
}
- newlock->basename = basename ? gf_strdup (basename) : NULL;
- newlock->type = type;
- newlock->trans = trans;
- newlock->volume = volume;
- newlock->client_pid = client_pid;
- newlock->owner = owner;
+ newlock->basename = basename ? gf_strdup (basename) : NULL;
+ newlock->type = type;
+ newlock->trans = client;
+ newlock->volume = volume;
+ newlock->client_pid = client_pid;
+ newlock->owner = *owner;
INIT_LIST_HEAD (&newlock->domain_list);
INIT_LIST_HEAD (&newlock->blocked_locks);
@@ -81,11 +72,11 @@ names_conflict (const char *n1, const char *n2)
}
-static int
+static inline int
__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
(l1->trans == l2->trans));
}
@@ -314,21 +305,16 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
int
__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
- call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock)
+ call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this,
+ int nonblock, char *conn_id)
{
pl_entry_lock_t *lock = NULL;
pl_entry_lock_t *conf = NULL;
- void *trans = NULL;
- pid_t client_pid = 0;
- uint64_t owner = 0;
-
- int ret = -EINVAL;
-
- trans = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
+ int ret = -EINVAL;
- lock = new_entrylk_lock (pinode, basename, type, trans, client_pid, owner, dom->domain);
+ lock = new_entrylk_lock (pinode, basename, type,
+ frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, dom->domain);
if (!lock) {
ret = -ENOMEM;
goto out;
@@ -336,19 +322,24 @@ __lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
lock->frame = frame;
lock->this = this;
- lock->trans = trans;
+ lock->trans = frame->root->client;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
conf = __lock_grantable (dom, basename, type);
if (conf) {
ret = -EAGAIN;
if (nonblock){
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *)lock->basename);
GF_FREE (lock);
goto out;
}
+ gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
gf_log (this->name, GF_LOG_TRACE,
@@ -361,8 +352,8 @@ __lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) {
ret = -EAGAIN;
if (nonblock) {
- if (lock->basename)
- GF_FREE ((char *) lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *) lock->basename);
GF_FREE (lock);
goto out;
@@ -370,6 +361,7 @@ __lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
lock->frame = frame;
lock->this = this;
+ gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
gf_log (this->name, GF_LOG_TRACE,
@@ -384,6 +376,7 @@ __lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
switch (type) {
case ENTRYLK_WRLCK:
+ gettimeofday (&lock->granted_time, NULL);
list_add_tail (&lock->domain_list, &dom->entrylk_list);
break;
@@ -439,6 +432,32 @@ out:
return ret_lock;
}
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
+{
+ uint32_t entrylk = 0;
+ pl_inode_t *pinode = 0;
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *conf = NULL;
+
+ pinode = pl_inode_get (this, parent);
+ if (!pinode)
+ goto out;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_for_each_entry (dom, &pinode->dom_list, inode_list) {
+ conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK);
+ if (conf && conf->basename) {
+ entrylk = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ return entrylk;
+}
void
__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
@@ -464,15 +483,16 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
pl_inode, bl->basename);
bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
- bl->frame, dom, bl->this, 0);
+ bl->frame, dom, bl->this, 0,
+ bl->connection_id);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"should never happen");
- if (bl->basename)
- GF_FREE ((char *)bl->basename);
+ GF_FREE (bl->connection_id);
+ GF_FREE ((char *)bl->basename);
GF_FREE (bl);
}
}
@@ -492,7 +512,8 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_lock (&pl_inode->mutex);
{
- __grant_blocked_entry_locks (this, pl_inode, dom, &granted_list);
+ __grant_blocked_entry_locks (this, pl_inode, dom,
+ &granted_list);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -503,24 +524,28 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
lock->basename, ENTRYLK_LOCK, lock->type,
0, 0);
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0);
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock);
}
GF_FREE ((char *)unlocked->basename);
+ GF_FREE (unlocked->connection_id);
GF_FREE (unlocked);
return;
}
/**
- * release_entry_locks_for_transport: release all entry locks from this
- * transport for this loc_t
+ * release_entry_locks_for_client: release all entry locks from this
+ * client for this loc_t
*/
static int
-release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
- pl_dom_list_t *dom, void *trans)
+release_entry_locks_for_client (xlator_t *this, pl_inode_t *pinode,
+ pl_dom_list_t *dom, client_t *client)
{
pl_entry_lock_t *lock = NULL;
pl_entry_lock_t *tmp = NULL;
@@ -534,14 +559,14 @@ release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
{
list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks,
blocked_locks) {
- if (lock->trans != trans)
+ if (lock->trans != client)
continue;
list_del_init (&lock->blocked_locks);
gf_log (this->name, GF_LOG_TRACE,
"releasing lock on held by "
- "{transport=%p}",trans);
+ "{client=%p}", client);
list_add (&lock->blocked_locks, &released);
@@ -549,16 +574,17 @@ release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
list_for_each_entry_safe (lock, tmp, &dom->entrylk_list,
domain_list) {
- if (lock->trans != trans)
+ if (lock->trans != client)
continue;
list_del_init (&lock->domain_list);
gf_log (this->name, GF_LOG_TRACE,
"releasing lock on held by "
- "{transport=%p}",trans);
+ "{client=%p}", client);
GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
GF_FREE (lock);
}
@@ -571,10 +597,10 @@ release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
list_for_each_entry_safe (lock, tmp, &released, blocked_locks) {
list_del_init (&lock->blocked_locks);
- STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN);
+ STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL);
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
GF_FREE (lock);
}
@@ -582,10 +608,10 @@ release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
list_del_init (&lock->blocked_locks);
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0);
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
GF_FREE (lock);
}
@@ -596,21 +622,23 @@ release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
int
pl_common_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, inode_t *inode, const char *basename,
- entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd)
-{
- uint64_t owner = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- void * transport = NULL;
- pid_t pid = -1;
+ entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
- pl_inode_t * pinode = NULL;
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
- pl_dom_list_t *dom = NULL;
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
pinode = pl_inode_get (this, inode);
if (!pinode) {
@@ -626,20 +654,17 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
- pid = frame->root->pid;
- owner = frame->root->lk_owner;
- transport = frame->root->trans;
-
- if (owner == 0) {
+ if (frame->root->lk_owner.len == 0) {
/*
this is a special case that means release
- all locks from this transport
+ all locks from this client
*/
gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
+ "Releasing locks for client %p", frame->root->client);
- release_entry_locks_for_transport (this, pinode, dom, transport);
+ release_entry_locks_for_client (this, pinode, dom,
+ frame->root->client);
op_ret = 0;
goto out;
@@ -650,7 +675,7 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
pthread_mutex_lock (&pinode->mutex);
{
ret = __lock_name (pinode, basename, type,
- frame, dom, this, 0);
+ frame, dom, this, 0, conn_id);
}
pthread_mutex_unlock (&pinode->mutex);
@@ -675,7 +700,7 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
pthread_mutex_lock (&pinode->mutex);
{
ret = __lock_name (pinode, basename, type,
- frame, dom, this, 1);
+ frame, dom, this, 1, conn_id);
}
pthread_mutex_unlock (&pinode->mutex);
@@ -712,7 +737,25 @@ out:
entrylk_trace_out (this, frame, volume, fd, loc, basename,
cmd, type, op_ret, op_errno);
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ ctx = pl_ctx_get (frame->root->client, this);
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+
+ if (cmd == ENTRYLK_UNLOCK)
+ pl_del_locker (ctx->ltable, volume, loc, fd,
+ &frame->root->lk_owner,
+ GF_FOP_ENTRYLK);
+ else
+ pl_add_locker (ctx->ltable, volume, loc, fd,
+ frame->root->pid,
+ &frame->root->lk_owner,
+ GF_FOP_ENTRYLK);
+
+unwind:
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL);
} else {
entrylk_trace_block (this, frame, volume, fd, loc, basename,
cmd, type);
@@ -731,10 +774,10 @@ out:
int
pl_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
-
- pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type, loc, NULL);
+ pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd,
+ type, loc, NULL, xdata);
return 0;
}
@@ -749,16 +792,16 @@ pl_entrylk (call_frame_t *frame, xlator_t *this,
int
pl_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
-
- pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type, NULL, fd);
+ pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd,
+ type, NULL, fd, xdata);
return 0;
}
-static int32_t
+int32_t
__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
{
int32_t count = 0;
@@ -767,24 +810,10 @@ __get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Active",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
count++;
}
list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Blocked",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
count++;
}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 717754390..508523e11 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -33,20 +23,30 @@
#include "locks.h"
#include "common.h"
-void
+inline void
__delete_inode_lock (pl_inode_lock_t *lock)
{
list_del (&lock->list);
}
-void
-__destroy_inode_lock (pl_inode_lock_t *lock)
+static inline void
+__pl_inodelk_ref (pl_inode_lock_t *lock)
{
- GF_FREE (lock);
+ lock->ref++;
+}
+
+inline void
+__pl_inodelk_unref (pl_inode_lock_t *lock)
+{
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
}
/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */
-static int
+static inline int
inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
@@ -120,10 +120,11 @@ inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
}
/* Returns true if the 2 inodelks have the same owner */
-static int same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+static inline int
+same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
/* Returns true if the 2 inodelks conflict with each other */
@@ -143,7 +144,8 @@ __inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
if (list_empty (&dom->inodelk_list))
goto out;
list_for_each_entry (l, &dom->inodelk_list, list){
- if (inodelk_conflict (lock, l)) {
+ if (inodelk_conflict (lock, l) &&
+ !same_inodelk_owner (lock, l)) {
ret = l;
goto out;
}
@@ -207,13 +209,14 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
if (can_block == 0)
goto out;
+ gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
@@ -226,21 +229,24 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
if (can_block == 0)
goto out;
+ gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
gf_log (this->name, GF_LOG_TRACE,
"Lock is grantable, but blocking to prevent starvation");
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
goto out;
}
+ __pl_inodelk_ref (lock);
+ gettimeofday (&lock->granted_time, NULL);
list_add (&lock->list, &dom->inodelk_list);
ret = 0;
@@ -284,20 +290,22 @@ __inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
conf = find_matching_inodelk (lock, dom);
if (!conf) {
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock not found for unlock");
+ gf_log (this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+ "on %p", (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end,
+ lkowner_utoa (&lock->owner), lock->client);
goto out;
}
__delete_inode_lock (conf);
gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock");
- __destroy_inode_lock (lock);
-
+ " Matching lock found for unlock %llu-%llu, by %s on %p",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner),
+ lock->client);
out:
return conf;
-
-
}
static void
__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
@@ -327,7 +335,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom)
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom)
{
struct list_head granted;
pl_inode_lock_t *lock;
@@ -335,11 +344,6 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *
INIT_LIST_HEAD (&granted);
- if (list_empty (&dom->blocked_inodelks)) {
- gf_log (this->name, GF_LOG_TRACE,
- "No blocked locks to be granted for domain: %s", dom->domain);
- }
-
pthread_mutex_lock (&pl_inode->mutex);
{
__grant_blocked_inode_locks (this, pl_inode, &granted, dom);
@@ -348,37 +352,44 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *
list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
&lock->user_flock, 0, 0, lock->volume);
- STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0);
+ STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
}
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_inodelk_unref (lock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
}
-/* Release all inodelks from this transport */
+/* Release all inodelks from this client */
static int
-release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom,
- inode_t *inode, void *trans)
+release_inode_locks_of_client (xlator_t *this, pl_dom_list_t *dom,
+ inode_t *inode, client_t *client)
{
pl_inode_lock_t *tmp = NULL;
pl_inode_lock_t *l = NULL;
pl_inode_t * pinode = NULL;
- struct list_head granted;
struct list_head released;
char *path = NULL;
+ char *file = NULL;
- INIT_LIST_HEAD (&granted);
INIT_LIST_HEAD (&released);
pinode = pl_inode_get (this, inode);
@@ -387,63 +398,65 @@ release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom,
{
list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
- if (l->transport != trans)
+ if (l->client != client)
continue;
list_del_init (&l->blocked_locks);
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
- }
+ inode_path (inode, NULL, &path);
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (inode->gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing blocking lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, client, (uint64_t) l->client_pid,
+ lkowner_utoa (&l->owner));
list_add (&l->blocked_locks, &released);
-
+ if (path) {
+ GF_FREE (path);
+ path = NULL;
+ }
}
list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
- if (l->transport != trans)
+ if (l->client != client)
continue;
- __delete_inode_lock (l);
- __destroy_inode_lock (l);
-
+ inode_path (inode, NULL, &path);
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (inode->gfid);
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing granted lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, client, (uint64_t) l->client_pid,
+ lkowner_utoa (&l->owner));
+
+ if (path) {
+ GF_FREE (path);
+ path = NULL;
}
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
-
-
+ __delete_inode_lock (l);
+ __pl_inodelk_unref (l);
}
}
-unlock:
- if (path)
- GF_FREE (path);
+ GF_FREE (path);
pthread_mutex_unlock (&pinode->mutex);
list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
list_del_init (&l->blocked_locks);
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN);
- GF_FREE (l);
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (l);
}
grant_blocked_inode_locks (this, pinode, dom);
@@ -457,47 +470,47 @@ pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
{
int ret = -EINVAL;
pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
pthread_mutex_lock (&pl_inode->mutex);
{
if (lock->fl_type != F_UNLCK) {
ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
- if (ret == 0)
+ if (ret == 0) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->fl_start,
lock->fl_end);
-
- if (ret == -EAGAIN)
+ } else if (ret == -EAGAIN) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
+ if (can_block)
+ unref = _gf_false;
+ }
+ } else {
+ retlock = __inode_unlock_lock (this, lock, dom);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+ __pl_inodelk_unref (retlock);
- goto out;
- }
-
-
- retlock = __inode_unlock_lock (this, lock, dom);
- if (!retlock) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Bad Unlock issued on Inode lock");
- ret = -EINVAL;
- goto out;
+ ret = 0;
}
- __destroy_inode_lock (retlock);
-
- ret = 0;
-
-
}
out:
+ if (unref)
+ __pl_inodelk_unref (lock);
pthread_mutex_unlock (&pl_inode->mutex);
grant_blocked_inode_locks (this, pl_inode, dom);
return ret;
@@ -505,8 +518,9 @@ out:
/* Create a new inode_lock_t */
pl_inode_lock_t *
-new_inode_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, const char *volume)
+new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ call_frame_t *frame, xlator_t *this, const char *volume,
+ char *conn_id)
{
pl_inode_lock_t *lock = NULL;
@@ -525,33 +539,76 @@ new_inode_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
else
lock->fl_end = flock->l_start + flock->l_len - 1;
- lock->transport = transport;
+ lock->client = client;
lock->client_pid = client_pid;
- lock->owner = owner;
lock->volume = volume;
+ lock->owner = frame->root->lk_owner;
+ lock->frame = frame;
+ lock->this = this;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
INIT_LIST_HEAD (&lock->list);
INIT_LIST_HEAD (&lock->blocked_locks);
+ __pl_inodelk_ref (lock);
return lock;
}
+int32_t
+_pl_convert_volume (const char *volume, char **res)
+{
+ char *mdata_vol = NULL;
+ int ret = 0;
+
+ mdata_vol = strrchr (volume, ':');
+ //if the volume already ends with :metadata don't bother
+ if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0))
+ return 0;
+
+ ret = gf_asprintf (res, "%s:metadata", volume);
+ if (ret <= 0)
+ return ENOMEM;
+ return 0;
+}
+
+int32_t
+_pl_convert_volume_for_special_range (struct gf_flock *flock,
+ const char *volume, char **res)
+{
+ int32_t ret = 0;
+
+ if ((flock->l_start == LLONG_MAX -1) &&
+ (flock->l_len == 0)) {
+ ret = _pl_convert_volume (volume, res);
+ }
+
+ return ret;
+}
+
/* Common inodelk code called from pl_inodelk and pl_finodelk */
int
pl_common_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, inode_t *inode, int32_t cmd,
- struct gf_flock *flock, loc_t *loc, fd_t *fd)
+ struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- void * transport = NULL;
- pid_t client_pid = -1;
- uint64_t owner = -1;
- pl_inode_t * pinode = NULL;
- pl_inode_lock_t * reqlock = NULL;
- pl_dom_list_t * dom = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
+ pl_inode_t * pinode = NULL;
+ pl_inode_lock_t * reqlock = NULL;
+ pl_dom_list_t * dom = NULL;
+ char *res = NULL;
+ char *res1 = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (inode, unwind);
@@ -562,11 +619,13 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
- pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
+ op_errno = _pl_convert_volume_for_special_range (flock, volume, &res);
+ if (op_errno)
+ goto unwind;
+ if (res)
+ volume = res;
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
+ pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
pinode = pl_inode_get (this, inode);
if (!pinode) {
@@ -575,22 +634,34 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
}
dom = get_domain (pinode, volume);
+ if (!dom) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (owner == 0) {
+ if (frame->root->lk_owner.len == 0) {
/*
special case: this means release all locks
- from this transport
+ from this client
*/
gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, dom, inode, transport);
+ "Releasing all locks from client %p", frame->root->client);
+
+ release_inode_locks_of_client (this, dom, inode, frame->root->client);
+ _pl_convert_volume (volume, &res1);
+ if (res1) {
+ dom = get_domain (pinode, res1);
+ if (dom)
+ release_inode_locks_of_client (this, dom,
+ inode, frame->root->client);
+ }
op_ret = 0;
goto unwind;
}
- reqlock = new_inode_lock (flock, transport, client_pid, owner, volume);
+ reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
+ frame, this, volume, conn_id);
if (!reqlock) {
op_ret = -1;
@@ -598,14 +669,10 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
- reqlock->frame = frame;
- reqlock->this = this;
switch (cmd) {
case F_SETLKW:
can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
/* fall through */
@@ -615,14 +682,13 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
can_block, dom);
if (ret < 0) {
- if ((can_block) && (F_UNLCK != reqlock->fl_type)) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
pl_trace_block (this, frame, fd, loc,
cmd, flock, volume);
goto out;
}
gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
op_errno = -ret;
- __destroy_inode_lock (reqlock);
goto unwind;
}
break;
@@ -638,86 +704,103 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
op_ret = 0;
+ ctx = pl_ctx_get (frame->root->client, this);
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+
+ if (flock->l_type == F_UNLCK)
+ pl_del_locker (ctx->ltable, volume, loc, fd,
+ &frame->root->lk_owner,
+ GF_FOP_INODELK);
+ else
+ pl_add_locker (ctx->ltable, volume, loc, fd,
+ frame->root->pid,
+ &frame->root->lk_owner,
+ GF_FOP_INODELK);
+
unwind:
if ((inode != NULL) && (flock !=NULL)) {
pl_update_refkeeper (this, inode);
pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
}
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
out:
+ GF_FREE (res);
+ GF_FREE (res1);
return 0;
}
int
pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
-
- pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, loc, NULL);
+ pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock,
+ loc, NULL, xdata);
return 0;
}
int
pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
-
- pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, NULL, fd);
+ pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock,
+ NULL, fd, xdata);
return 0;
}
+static inline int32_t
+__get_inodelk_dom_count (pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ count++;
+ }
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ count++;
+ }
+ return count;
+}
-static int32_t
-__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode)
+/* Returns the no. of locks (blocked/granted) held on a given domain name
+ * If @domname is NULL, returns the no. of locks in all the domains present.
+ * If @domname is non-NULL and non-existent, returns 0 */
+int32_t
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname)
{
int32_t count = 0;
- pl_inode_lock_t *lock = NULL;
pl_dom_list_t *dom = NULL;
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- list_for_each_entry (lock, &dom->inodelk_list, list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Active",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- count++;
- }
+ if (domname) {
+ if (strcmp (domname, dom->domain) == 0) {
+ count = __get_inodelk_dom_count (dom);
+ goto out;
+ }
- list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ } else {
+ /* Counting locks from all domains */
+ count += __get_inodelk_dom_count (dom);
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Blocked",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- count++;
}
-
}
+out:
return count;
}
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode)
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname)
{
pl_inode_t *pl_inode = NULL;
uint64_t tmp_pl_inode = 0;
@@ -733,7 +816,7 @@ get_inodelk_count (xlator_t *this, inode_t *inode)
pthread_mutex_lock (&pl_inode->mutex);
{
- count = __get_inodelk_count (this, pl_inode);
+ count = __get_inodelk_count (this, pl_inode, domname);
}
pthread_mutex_unlock (&pl_inode->mutex);
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index 9d44f0eba..08aeb0a79 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __LOCKS_MEM_TYPES_H__
#define __LOCKS_MEM_TYPES_H__
@@ -32,7 +22,6 @@ enum gf_locks_mem_types_ {
gf_locks_mt_truncate_ops,
gf_locks_mt_pl_rw_req_t,
gf_locks_mt_posix_locks_private_t,
- gf_locks_mt_pl_local_t,
gf_locks_mt_pl_fdctx_t,
gf_locks_mt_end
};
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index ab70064a9..76fc941d7 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
@@ -29,8 +19,10 @@
#include "stack.h"
#include "call-stub.h"
#include "locks-mem-types.h"
+#include "client_t.h"
+
+#include "lkowner.h"
-#define POSIX_LOCKS "posix-locks"
struct __pl_fd;
struct __posix_lock {
@@ -41,25 +33,29 @@ struct __posix_lock {
off_t fl_end;
short blocked; /* waiting to acquire */
- struct gf_flock user_flock; /* the flock supplied by the user */
+ struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
unsigned long fd_num;
fd_t *fd;
call_frame_t *frame;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
/* These two together serve to uniquely identify each process
across nodes */
- void *transport; /* to identify client node */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
pid_t client_pid; /* pid of client process */
- uint64_t owner; /* lock owner from fuse */
};
typedef struct __posix_lock posix_lock_t;
struct __pl_inode_lock {
struct list_head list;
struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ int ref;
short fl_type;
off_t fl_start;
@@ -67,18 +63,23 @@ struct __pl_inode_lock {
const char *volume;
- struct gf_flock user_flock; /* the flock supplied by the user */
+ struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
fd_t *fd;
call_frame_t *frame;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
/* These two together serve to uniquely identify each process
across nodes */
- void *transport; /* to identify client node */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
pid_t client_pid; /* pid of client process */
- uint64_t owner;
+
+ char *connection_id; /* stores the client connection id */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -111,9 +112,14 @@ struct __entry_lock {
const char *basename;
entrylk_type type;
- void *trans;
- pid_t client_pid; /* pid of client process */
- uint64_t owner;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
+ void *trans;
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
};
typedef struct __entry_lock pl_entry_lock_t;
@@ -147,16 +153,40 @@ typedef struct __pl_fd pl_fd_t;
typedef struct {
gf_boolean_t mandatory; /* if mandatory locking is enabled */
gf_boolean_t trace; /* trace lock requests in and out */
+ char *brickname;
} posix_locks_private_t;
+
typedef struct {
gf_boolean_t entrylk_count_req;
gf_boolean_t inodelk_count_req;
+ gf_boolean_t inodelk_dom_count_req;
gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+
+ /* used by {f,}truncate */
+ loc_t loc;
+ fd_t *fd;
+ off_t offset;
+ dict_t *xdata;
+ enum {TRUNCATE, FTRUNCATE} op;
} pl_local_t;
+
typedef struct {
struct list_head locks_list;
} pl_fdctx_t;
+
+typedef struct _locks_ctx {
+ gf_lock_t ltable_lock; /* only for replace,
+ ltable has its own internal
+ lock for operations */
+ struct _lock_table *ltable;
+} pl_ctx_t;
+
+
+pl_ctx_t *
+pl_ctx_get (client_t *client, xlator_t *xlator);
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index e1ec8db02..7bfb38a51 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -37,6 +27,9 @@
#include "locks.h"
#include "common.h"
#include "statedump.h"
+#include "clear.h"
+#include "defaults.h"
+#include "syncop.h"
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -47,13 +40,9 @@
void do_blocked_rw (pl_inode_t *);
static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
-
-struct _truncate_ops {
- loc_t loc;
- fd_t *fd;
- off_t offset;
- enum {TRUNCATE, FTRUNCATE} op;
-};
+static int format_brickname(char *);
+int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *);
+static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
static pl_fdctx_t *
pl_new_fdctx ()
@@ -62,7 +51,7 @@ pl_new_fdctx ()
fdctx = GF_CALLOC (1, sizeof (*fdctx),
gf_locks_mt_pl_fdctx_t);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, fdctx, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out);
INIT_LIST_HEAD (&fdctx->locks_list);
@@ -77,7 +66,7 @@ pl_check_n_create_fdctx (xlator_t *this, fd_t *fd)
uint64_t tmp = 0;
pl_fdctx_t *fdctx = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, this, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", this, out);
GF_VALIDATE_OR_GOTO (this->name, fd, out);
LOCK (&fd->lock);
@@ -108,25 +97,30 @@ out:
int
pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
local = frame->local;
if (local->op == TRUNCATE)
loc_wipe (&local->loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
static int
truncate_allowed (pl_inode_t *pl_inode,
- void *transport, pid_t client_pid,
- uint64_t owner, off_t offset)
+ client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, off_t offset)
{
posix_lock_t *l = NULL;
posix_lock_t region = {.list = {0, }, };
@@ -134,9 +128,9 @@ truncate_allowed (pl_inode_t *pl_inode,
region.fl_start = offset;
region.fl_end = LLONG_MAX;
- region.transport = transport;
+ region.client = client;
region.client_pid = client_pid;
- region.owner = owner;
+ region.owner = *owner;
pthread_mutex_lock (&pl_inode->mutex);
{
@@ -145,7 +139,7 @@ truncate_allowed (pl_inode_t *pl_inode,
&& locks_overlap (&region, l)
&& !same_owner (&region, l)) {
ret = 0;
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Truncate "
+ gf_log ("posix-locks", GF_LOG_TRACE, "Truncate "
"allowed");
break;
}
@@ -159,10 +153,11 @@ truncate_allowed (pl_inode_t *pl_inode,
static int
truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
posix_locks_private_t *priv = NULL;
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
inode_t *inode = NULL;
pl_inode_t *pl_inode = NULL;
@@ -191,8 +186,8 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (priv->mandatory
&& pl_inode->mandatory
- && !truncate_allowed (pl_inode, frame->root->trans,
- frame->root->pid, frame->root->lk_owner,
+ && !truncate_allowed (pl_inode, frame->root->client,
+ frame->root->pid, &frame->root->lk_owner,
local->offset)) {
op_ret = -1;
op_errno = EAGAIN;
@@ -203,12 +198,12 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
case TRUNCATE:
STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset);
+ &local->loc, local->offset, local->xdata);
break;
case FTRUNCATE:
STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset);
+ local->fd, local->offset, local->xdata);
break;
}
@@ -219,37 +214,42 @@ unwind:
"error: %s", op_ret, strerror (op_errno));
if (local->op == TRUNCATE)
loc_wipe (&local->loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata);
return 0;
}
int
pl_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = GF_CALLOC (1, sizeof (struct _truncate_ops),
- gf_locks_mt_truncate_ops);
+ local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, unwind);
local->op = TRUNCATE;
local->offset = offset;
loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
frame->local = local;
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
+ FIRST_CHILD (this)->fops->stat, loc, NULL);
return 0;
unwind:
gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, "
"error: %s", loc->path, -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -257,32 +257,54 @@ unwind:
int
pl_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = GF_CALLOC (1, sizeof (struct _truncate_ops),
- gf_locks_mt_truncate_ops);
+ local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, unwind);
local->op = FTRUNCATE;
local->offset = offset;
- local->fd = fd;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
frame->local = local;
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
unwind:
gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, "
"error: %s", -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
+int
+pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *l = NULL;
+ int found = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ found = 1;
+ break;
+ }
+ }
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return found;
+}
+
static void
delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
{
@@ -312,7 +334,8 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
list_for_each_entry_safe (l, tmp, &blocked_list, list) {
list_del_init(&l->list);
- STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock);
+ STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock,
+ NULL);
__destroy_lock (l);
}
@@ -324,7 +347,7 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
static void
__delete_locks_of_owner (pl_inode_t *pl_inode,
- void *transport, uint64_t owner)
+ client_t *client, gf_lkowner_t *owner)
{
posix_lock_t *tmp = NULL;
posix_lock_t *l = NULL;
@@ -332,14 +355,16 @@ __delete_locks_of_owner (pl_inode_t *pl_inode,
/* TODO: what if it is a blocked lock with pending l->frame */
list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if ((l->transport == transport)
- && (l->owner == owner)) {
+ if (l->blocked)
+ continue;
+ if ((l->client == client) &&
+ is_same_lkowner (&l->owner, owner)) {
gf_log ("posix-locks", GF_LOG_TRACE,
" Flushing lock"
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s",
l->fl_type == F_UNLCK ? "Unlock" : "Lock",
l->client_pid,
- l->owner,
+ lkowner_utoa (&l->owner),
l->user_flock.l_start,
l->user_flock.l_len,
l->blocked == 1 ? "Blocked" : "Active");
@@ -352,13 +377,538 @@ __delete_locks_of_owner (pl_inode_t *pl_inode,
return;
}
+
+int32_t
+pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+
+}
+
+int32_t
+pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int op_ret = -1;
+ int32_t bcount = 0;
+ int32_t gcount = 0;
+ char key[PATH_MAX] = {0, };
+ char *lk_summary = NULL;
+ pl_inode_t *pl_inode = NULL;
+ dict_t *dict = NULL;
+ clrlk_args args = {0,};
+ char *brickname = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ if (clrlk_parse_args (name, &args)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pl_inode = pl_inode_get (this, loc->inode);
+ if (!pl_inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ switch (args.type) {
+ case CLRLK_INODE:
+ case CLRLK_ENTRY:
+ op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode,
+ &args, &bcount,
+ &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_POSIX:
+ op_ret = clrlk_clear_posixlk (this, pl_inode, &args,
+ &bcount, &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_TYPE_MAX:
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't get brickname");
+ } else {
+ op_ret = format_brickname(brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't format brickname");
+ GF_FREE(brickname);
+ brickname = NULL;
+ }
+ }
+
+ if (!gcount && !bcount) {
+ if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL)? this->name : brickname,
+ (args.type == CLRLK_INODE)? "inode":
+ (args.type == CLRLK_ENTRY)? "entry":
+ (args.type == CLRLK_POSIX)? "posix": " ",
+ bcount, gcount) == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ strncpy (key, name, strlen (name));
+ if (dict_set_dynstr (dict, key, lk_summary)) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ GF_FREE(brickname);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ GF_FREE (args.opts);
+ if (op_ret && lk_summary)
+ GF_FREE (lk_summary);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+
+usual:
+ STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+static int
+format_brickname(char *brickname)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *volume = NULL;
+ char *saveptr = NULL;
+
+ if (!brickname)
+ goto out;
+
+ strtok_r(brickname, ":", &saveptr);
+ hostname = gf_strdup(strtok_r(NULL, ":", &saveptr));
+ if (hostname == NULL)
+ goto out;
+ volume = gf_strdup(strtok_r(NULL, ".", &saveptr));
+ if (volume == NULL)
+ goto out;
+
+ sprintf(brickname, "%s:%s", hostname, volume);
+
+ ret = 0;
+out:
+ GF_FREE(hostname);
+ GF_FREE(volume);
+ return ret;
+}
+
+static int
+fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno,
+ char **brickname)
+{
+ int ret = -1;
+ loc_t loc = {0, };
+ dict_t *dict = NULL;
+
+ if (!brickname)
+ goto out;
+
+ if (!op_errno)
+ goto out;
+
+ uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref (inode);
+
+ ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict,
+ GF_XATTR_PATHINFO_KEY);
+ if (ret < 0) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname);
+ if (ret)
+ goto out;
+
+ *brickname = gf_strdup(*brickname);
+ if (*brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+
+int
+pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ int ret = -1;
+ posix_locks_private_t *priv = NULL;
+ char *brickname = NULL;
+ char *end = NULL;
+ char *tmp = NULL;
+
+ priv = this->private;
+
+ ret = fetch_pathinfo (this, inode, op_errno, &brickname);
+ if (ret)
+ goto out;
+
+ end = strrchr (brickname, ':');
+ if (!end) {
+ GF_FREE(brickname);
+ ret = -1;
+ goto out;
+ }
+
+ tmp = brickname;
+ brickname = gf_strndup (brickname, (end - brickname));
+ if (brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->brickname = brickname;
+ ret = 0;
+out:
+ GF_FREE(tmp);
+ return ret;
+}
+
+char *
+pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ posix_locks_private_t *priv = NULL;
+ char *key = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (priv->brickname == NULL) {
+ ret = pl_lockinfo_get_brickname (this, inode, op_errno);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot get brickname");
+ goto out;
+ }
+ }
+
+ key = priv->brickname;
+out:
+ return key;
+}
+
+int32_t
+pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ char *key = NULL, *buf = NULL;
+ int32_t op_ret = 0;
+ unsigned long fdnum = 0, len = 0;
+ dict_t *tmp = NULL;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
+ *op_errno = EBADFD;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (!pl_locks_by_fd (pl_inode, fd)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ fdnum = fd_to_fdnum (fd);
+
+ key = pl_lockinfo_key (this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ tmp = dict_new ();
+ if (tmp == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_set_uint64 (tmp, key, fdnum);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ len = dict_serialized_length (tmp);
+ if (len < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialized_length failed (%s) while handling "
+ "lockinfo for fd (ptr:%p inode-gfid:%s)",
+ strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (buf == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_serialize (tmp, buf);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s) while handling lockinfo "
+ "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno),
+ fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ buf = NULL;
+out:
+ if (tmp != NULL) {
+ dict_unref (tmp);
+ }
+
+ if (buf != NULL) {
+ GF_FREE (buf);
+ }
+
+ return op_ret;
+}
+
+
+int32_t
+pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ dict_t *dict = NULL;
+
+ if (!name) {
+ goto usual;
+ }
+
+ if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) {
+ dict = dict_new ();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict,
+ &op_errno);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting lockinfo on fd (ptr:%p inode-gfid:%s) "
+ "failed (%s)", fd, uuid_utoa (fd->inode->gfid),
+ strerror (op_errno));
+ }
+
+ goto unwind;
+ } else {
+ goto usual;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+
+ return 0;
+
+usual:
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
+ int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t newfd_num = 0;
+ posix_lock_t *l = NULL;
+ int32_t op_ret = 0;
+
+ newfd_num = fd_to_fdnum (newfd);
+
+ pl_inode = pl_inode_get (frame->this, newfd->inode);
+ if (pl_inode == NULL) {
+ op_ret = -1;
+ *op_errno = EBADFD;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == oldfd_num) {
+ l->fd_num = newfd_num;
+ l->client = frame->root->client;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
+ int len, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ dict_t *lockinfo = NULL;
+ uint64_t oldfd_num = 0;
+ char *key = NULL;
+
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ key = pl_lockinfo_key (frame->this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num);
+
+ if (oldfd_num == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno);
+ if (op_ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "migration of locks from oldfd (ptr:%p) to newfd "
+ "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd,
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+out:
+ dict_unref (lockinfo);
+
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ void *lockinfo_buf = NULL;
+ int len = 0;
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
+ if (lockinfo_buf == NULL) {
+ goto usual;
+ }
+
+ op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len,
+ &op_errno);
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+usual:
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
int32_t
pl_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
pl_fdctx_t *fdctx = NULL;
@@ -377,28 +927,28 @@ unwind:
frame,
op_ret,
op_errno,
- fd);
+ fd, xdata);
return 0;
}
int32_t
pl_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
STACK_WIND (frame,
pl_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -406,24 +956,21 @@ pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
pl_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t owner = -1;
-
- owner = frame->root->lk_owner;
+ pl_inode_t *pl_inode = NULL;
pl_inode = pl_inode_get (this, fd->inode);
if (!pl_inode) {
gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
- STACK_UNWIND_STRICT (flush, frame, -1, EBADFD);
+ STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL);
return 0;
}
pl_trace_flush (this, frame, fd);
- if (owner == 0) {
+ if (frame->root->lk_owner.len == 0) {
/* Handle special case when protocol/server sets lk-owner to zero.
* This usually happens due to a client disconnection. Hence, free
* all locks opened with this fd.
@@ -436,8 +983,8 @@ pl_flush (call_frame_t *frame, xlator_t *this,
}
pthread_mutex_lock (&pl_inode->mutex);
{
- __delete_locks_of_owner (pl_inode, frame->root->trans,
- owner);
+ __delete_locks_of_owner (pl_inode, frame->root->client,
+ &frame->root->lk_owner);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -447,14 +994,14 @@ pl_flush (call_frame_t *frame, xlator_t *this,
wind:
STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd);
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
return 0;
}
int
pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
pl_fdctx_t *fdctx = NULL;
@@ -469,7 +1016,7 @@ pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
unwind:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -477,12 +1024,11 @@ unwind:
int
pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
- /* why isn't O_TRUNC being handled ? */
STACK_WIND (frame, pl_open_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- loc, flags & ~O_TRUNC, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
@@ -492,7 +1038,7 @@ int
pl_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
pl_fdctx_t *fdctx = NULL;
@@ -508,7 +1054,7 @@ pl_create_cbk (call_frame_t *frame, void *cookie,
unwind:
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -516,11 +1062,12 @@ unwind:
int
pl_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
STACK_WIND (frame, pl_create_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
@@ -529,10 +1076,10 @@ int
pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref);
+ vector, count, stbuf, iobref, xdata);
return 0;
}
@@ -540,9 +1087,10 @@ pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -600,12 +1148,12 @@ __rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
int
-pl_readv_cont (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -613,7 +1161,7 @@ pl_readv_cont (call_frame_t *frame, xlator_t *this,
int
pl_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
@@ -630,7 +1178,7 @@ pl_readv (call_frame_t *frame, xlator_t *this,
if (priv->mandatory && pl_inode->mandatory) {
region.fl_start = offset;
region.fl_end = offset + size - 1;
- region.transport = frame->root->trans;
+ region.client = frame->root->client;
region.fd_num = fd_to_fdnum(fd);
region.client_pid = frame->root->pid;
region.owner = frame->root->lk_owner;
@@ -660,7 +1208,8 @@ pl_readv (call_frame_t *frame, xlator_t *this,
}
rw->stub = fop_readv_stub (frame, pl_readv_cont,
- fd, size, offset);
+ fd, size, offset, flags,
+ xdata);
if (!rw->stub) {
op_errno = ENOMEM;
op_ret = -1;
@@ -680,12 +1229,12 @@ pl_readv (call_frame_t *frame, xlator_t *this,
if (wind_needed) {
STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
}
if (op_ret == -1)
STACK_UNWIND_STRICT (readv, frame, -1, op_errno,
- NULL, 0, NULL, NULL);
+ NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -694,11 +1243,11 @@ pl_readv (call_frame_t *frame, xlator_t *this,
int
pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
@@ -707,7 +1256,7 @@ pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
int
pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
@@ -717,14 +1266,13 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
int op_errno = 0;
char wind_needed = 1;
-
priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
if (priv->mandatory && pl_inode->mandatory) {
region.fl_start = offset;
region.fl_end = offset + iov_length (vector, count) - 1;
- region.transport = frame->root->trans;
+ region.client = frame->root->client;
region.fd_num = fd_to_fdnum(fd);
region.client_pid = frame->root->pid;
region.owner = frame->root->lk_owner;
@@ -755,7 +1303,7 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
rw->stub = fop_writev_stub (frame, pl_writev_cont,
fd, vector, count, offset,
- iobref);
+ flags, iobref, xdata);
if (!rw->stub) {
op_errno = ENOMEM;
op_ret = -1;
@@ -775,10 +1323,11 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (wind_needed)
STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
if (op_ret == -1)
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -804,8 +1353,8 @@ lock_dup (posix_lock_t *lock)
{
posix_lock_t *new_lock = NULL;
- new_lock = new_posix_lock (&lock->user_flock, lock->transport,
- lock->client_pid, lock->owner,
+ new_lock = new_posix_lock (&lock->user_flock, lock->client,
+ lock->client_pid, &lock->owner,
(fd_t *)lock->fd_num);
return new_lock;
}
@@ -962,22 +1511,15 @@ unlock:
int
pl_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- void *transport = NULL;
- pid_t client_pid = 0;
- uint64_t owner = 0;
- pl_inode_t *pl_inode = NULL;
- int op_ret = 0;
- int op_errno = 0;
- int can_block = 0;
- posix_lock_t *reqlock = NULL;
- posix_lock_t *conf = NULL;
- int ret = 0;
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ int ret = 0;
if ((flock->l_start < 0) || (flock->l_len < 0)) {
op_ret = -1;
@@ -992,8 +1534,8 @@ pl_lk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
- reqlock = new_posix_lock (flock, transport, client_pid,
- owner, fd);
+ reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, fd);
if (!reqlock) {
op_ret = -1;
@@ -1102,7 +1644,7 @@ pl_lk (call_frame_t *frame, xlator_t *this,
can_block);
if (ret == -1) {
- if ((can_block) && (F_UNLCK != reqlock->fl_type)) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL);
goto out;
}
@@ -1110,13 +1652,22 @@ pl_lk (call_frame_t *frame, xlator_t *this,
op_ret = -1;
op_errno = EAGAIN;
__destroy_lock (reqlock);
+
+ } else if ((0 == ret) && (F_UNLCK == flock->l_type)) {
+ /* For NLM's last "unlock on fd" detection */
+ if (pl_locks_by_fd (pl_inode, fd))
+ flock->l_type = F_RDLCK;
+ else
+ flock->l_type = F_UNLCK;
}
}
unwind:
pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
pl_update_refkeeper (this, fd->inode);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock);
+
+
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
out:
return 0;
}
@@ -1193,7 +1744,7 @@ pl_forget (xlator_t *this,
list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) {
__delete_inode_lock (ino_l);
- __destroy_inode_lock (ino_l);
+ __pl_inodelk_unref (ino_l);
}
list_splice_init (&dom->blocked_inodelks, &inodelks_released);
@@ -1207,8 +1758,8 @@ pl_forget (xlator_t *this,
list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) {
list_del_init (&entry_l->domain_list);
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
GF_FREE (entry_l);
}
@@ -1227,21 +1778,22 @@ pl_forget (xlator_t *this,
list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) {
- STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0, &ext_l->user_flock);
+ STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0,
+ &ext_l->user_flock, NULL);
__destroy_lock (ext_l);
}
list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) {
- STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0);
- __destroy_inode_lock (ino_l);
+ STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref (ino_l);
}
list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) {
- STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0);
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
+ STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
GF_FREE (entry_l);
}
@@ -1317,7 +1869,7 @@ out:
return ret;
}
-static int32_t
+int32_t
__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
{
posix_lock_t *lock = NULL;
@@ -1325,16 +1877,6 @@ __get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
list_for_each_entry (lock, &pl_inode->ext_list, list) {
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len,
- lock->blocked == 1 ? "Blocked" : "Active");
-
count++;
}
@@ -1367,6 +1909,24 @@ out:
}
void
+pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent,
+ char *basename, dict_t *dict)
+{
+ uint32_t entrylk = 0;
+ int ret = -1;
+
+ if (!parent || !basename || !strlen (basename))
+ goto out;
+ entrylk = check_entrylk_on_basename (this, parent, basename);
+out:
+ ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK);
+ }
+}
+
+void
pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
dict_t *dict)
{
@@ -1383,19 +1943,34 @@ pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
}
void
-pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,
+ gf_boolean_t per_dom)
{
int32_t count = 0;
int ret = -1;
+ char *domname = NULL;
+
+
+ if (per_dom){
+ ret = dict_get_str (dict, GLUSTERFS_INODELK_DOM_COUNT,
+ &domname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "value for key %s",GLUSTERFS_INODELK_DOM_COUNT);
+ goto out;
+ }
+ }
+
+ count = get_inodelk_count (this, inode, domname);
- count = get_inodelk_count (this, inode);
ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_INODELK_COUNT);
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for "
+ "key %s", GLUSTERFS_INODELK_COUNT);
}
+out:
+ return;
}
void
@@ -1422,50 +1997,57 @@ pl_lookup_cbk (call_frame_t *frame,
int32_t op_errno,
inode_t *inode,
struct iatt *buf,
- dict_t *dict,
+ dict_t *xdata,
struct iatt *postparent)
{
pl_local_t *local = NULL;
GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
- if (op_ret) {
+ if (op_ret)
goto out;
- }
local = frame->local;
+ if (local->parent_entrylk_req)
+ pl_parent_entrylk_xattr_fill (this, local->loc.parent,
+ (char*)local->loc.name, xdata);
if (local->entrylk_count_req)
- pl_entrylk_xattr_fill (this, inode, dict);
+ pl_entrylk_xattr_fill (this, inode, xdata);
if (local->inodelk_count_req)
- pl_inodelk_xattr_fill (this, inode, dict);
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_true);
if (local->posixlk_count_req)
- pl_posixlk_xattr_fill (this, inode, dict);
+ pl_posixlk_xattr_fill (this, inode, xdata);
+out:
+ local = frame->local;
frame->local = NULL;
- if (local != NULL)
- GF_FREE (local);
+ if (local != NULL) {
+ loc_wipe (&local->loc);
+ mem_put (local);
+ }
-out:
STACK_UNWIND_STRICT (
lookup,
frame,
- op_ret,
- op_errno,
- inode,
- buf,
- dict,
- postparent);
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
return 0;
}
int32_t
pl_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xdata)
{
pl_local_t *local = NULL;
int ret = -1;
@@ -1474,39 +2056,114 @@ pl_lookup (call_frame_t *frame,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- local = GF_CALLOC (1, sizeof (*local), gf_locks_mt_pl_local_t);
+ local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, out);
- if (xattr_req) {
- if (dict_get (xattr_req, GLUSTERFS_ENTRYLK_COUNT))
+ if (xdata) {
+ if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT))
local->entrylk_count_req = 1;
- if (dict_get (xattr_req, GLUSTERFS_INODELK_COUNT))
+ if (dict_get (xdata, GLUSTERFS_INODELK_COUNT))
local->inodelk_count_req = 1;
- if (dict_get (xattr_req, GLUSTERFS_POSIXLK_COUNT))
+ if (dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT))
local->posixlk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK))
+ local->parent_entrylk_req = 1;
}
frame->local = local;
+ loc_copy (&local->loc, loc);
STACK_WIND (frame,
pl_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
+ loc, xdata);
ret = 0;
out:
if (ret == -1)
- STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+int
+pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
+
+ local = frame->local;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (local->entrylk_count_req)
+ pl_entrylk_xattr_fill (this, entry->inode, entry->dict);
+ if (local->inodelk_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_true);
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill (this, entry->inode, entry->dict);
+ }
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ if (local)
+ mem_put (local);
+
+ return 0;
+}
+
+int
+pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ pl_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (dict) {
+ if (dict_get (dict, GLUSTERFS_ENTRYLK_COUNT))
+ local->entrylk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_COUNT))
+ local->inodelk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_POSIXLK_COUNT))
+ local->posixlk_count_req = 1;
+ }
+ frame->local = local;
+
+ STACK_WIND (frame, pl_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
+
void
pl_dump_lock (char *str, int size, struct gf_flock *flock,
- uint64_t owner, void *trans)
+ gf_lkowner_t *owner, void *trans, char *conn_id,
+ time_t *granted_time, time_t *blkd_time, gf_boolean_t active)
{
- char *type_str = NULL;
+ char *type_str = NULL;
+ char granted[32] = {0,};
+ char blocked[32] = {0,};
switch (flock->l_type) {
case F_RDLCK:
@@ -1523,13 +2180,35 @@ pl_dump_lock (char *str, int size, struct gf_flock *flock,
break;
}
- snprintf (str, size, "type=%s, start=%llu, len=%llu, pid=%llu, lk-owner=%llu, transport=%p",
- type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- (unsigned long long) owner,
- trans);
-
+ if (active) {
+ if (blkd_time && *blkd_time == 0) {
+ snprintf (str, size, RANGE_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (granted_time, granted));
+ } else {
+ snprintf (str, size, RANGE_BLKD_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked),
+ ctime_r (granted_time, granted));
+ }
+ }
+ else {
+ snprintf (str, size, RANGE_BLKD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked));
+ }
}
@@ -1538,8 +2217,10 @@ __dump_entrylks (pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- int count = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
+ char blocked[32] = {0,};
+ char granted[32] = {0,};
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
char tmp[256];
@@ -1548,7 +2229,7 @@ __dump_entrylks (pl_inode_t *pl_inode)
count = 0;
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain",
+ "lock-dump.domain",
"domain");
gf_proc_dump_write(key, "%s", dom->domain);
@@ -1556,12 +2237,25 @@ __dump_entrylks (pl_inode_t *pl_inode)
gf_proc_dump_build_key(key,
"xlator.feature.locks.lock-dump.domain.entrylk",
- "entrylk[%d](ACTIVE)",count );
- snprintf (tmp, 256," %s on %s owner=%llu, transport=%p",
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename,
- (unsigned long long) lock->owner,
- lock->trans);
+ "entrylk[%d](ACTIVE)", count );
+ if (lock->blkd_time.tv_sec == 0 && lock->blkd_time.tv_usec == 0) {
+ snprintf (tmp, 256, ENTRY_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->granted_time.tv_sec, granted));
+ } else {
+ snprintf (tmp, 256, ENTRY_BLKD_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked),
+ ctime_r (&lock->granted_time.tv_sec, granted));
+ }
gf_proc_dump_write(key, tmp);
@@ -1572,12 +2266,14 @@ __dump_entrylks (pl_inode_t *pl_inode)
gf_proc_dump_build_key(key,
"xlator.feature.locks.lock-dump.domain.entrylk",
- "entrylk[%d](BLOCKED)",count );
- snprintf (tmp, 256," %s on %s owner=%llu, transport=%p,"
- " state = Blocked",
+ "entrylk[%d](BLOCKED)", count );
+ snprintf (tmp, 256, ENTRY_BLKD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
"ENTRYLK_WRLCK", lock->basename,
- (unsigned long long) lock->owner, lock->trans);
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked));
gf_proc_dump_write(key, tmp);
@@ -1614,18 +2310,23 @@ __dump_inodelks (pl_inode_t *pl_inode)
count = 0;
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain",
+ "lock-dump.domain",
"domain");
gf_proc_dump_write(key, "%s", dom->domain);
list_for_each_entry (lock, &dom->inodelk_list, list) {
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.inodelk",
+ "inodelk",
"inodelk[%d](ACTIVE)",count );
+ SET_FLOCK_PID (&lock->user_flock, lock);
pl_dump_lock (tmp, 256, &lock->user_flock,
- lock->owner, lock->transport);
+ &lock->owner,
+ lock->client, lock->connection_id,
+ &lock->granted_time.tv_sec,
+ &lock->blkd_time.tv_sec,
+ _gf_true);
gf_proc_dump_write(key, tmp);
count++;
@@ -1634,10 +2335,14 @@ __dump_inodelks (pl_inode_t *pl_inode)
list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.inodelk",
+ "inodelk",
"inodelk[%d](BLOCKED)",count );
+ SET_FLOCK_PID (&lock->user_flock, lock);
pl_dump_lock (tmp, 256, &lock->user_flock,
- lock->owner, lock->transport);
+ &lock->owner,
+ lock->client, lock->connection_id,
+ 0, &lock->blkd_time.tv_sec,
+ _gf_false);
gf_proc_dump_write(key, tmp);
count++;
@@ -1669,20 +2374,20 @@ __dump_posixlks (pl_inode_t *pl_inode)
list_for_each_entry (lock, &pl_inode->ext_list, list) {
+ SET_FLOCK_PID (&lock->user_flock, lock);
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.posixlk",
+ "posixlk",
"posixlk[%d](%s)",
count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock (tmp, 256, &lock->user_flock,
- lock->owner, lock->transport);
+ &lock->owner, lock->client, NULL,
+ &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
+ (lock->blocked)? _gf_false: _gf_true);
gf_proc_dump_write(key, tmp);
count++;
}
-
-
-
}
void
@@ -1703,77 +2408,82 @@ pl_dump_inode_priv (xlator_t *this, inode_t *inode)
int ret = -1;
uint64_t tmp_pl_inode = 0;
pl_inode_t *pl_inode = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
+ char *pathname = NULL;
+ gf_boolean_t section_added = _gf_false;
int count = 0;
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
-
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (!inode) {
+ errno = EINVAL;
+ goto out;
+ }
- if (ret != 0)
+ ret = TRY_LOCK (&inode->lock);
+ if (ret)
+ goto out;
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (ret)
goto out;
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
-
if (!pl_inode) {
ret = -1;
goto out;
}
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.inode",
- "%ld.mandatory",inode->ino);
- gf_proc_dump_write(key, "%d", pl_inode->mandatory);
-
+ gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name);
+ section_added = _gf_true;
- count = get_entrylk_count (this, inode);
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.entrylk-count",
- "%ld.entrylk-count", inode->ino);
- gf_proc_dump_write(key, "%d", count);
-
- dump_entrylks(pl_inode);
-
- count = get_inodelk_count (this, inode);
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.inodelk-count",
- "%ld.inodelk-count", inode->ino);
- gf_proc_dump_write(key, "%d", count);
+ /*We are safe to call __inode_path since we have the
+ * inode->table->lock */
+ __inode_path (inode, NULL, &pathname);
+ if (pathname)
+ gf_proc_dump_write ("path", "%s", pathname);
- dump_inodelks(pl_inode);
+ gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory);
- count = get_posixlk_count (this, inode);
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.posixlk-count",
- "%ld.posixlk-count", inode->ino);
- gf_proc_dump_write(key, "%d", count);
+ ret = pthread_mutex_trylock (&pl_inode->mutex);
+ if (ret)
+ goto out;
+ {
+ count = __get_entrylk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("entrylk-count", "%d", count);
+ __dump_entrylks (pl_inode);
+ }
- dump_posixlks(pl_inode);
+ count = __get_inodelk_count (this, pl_inode, NULL);
+ if (count) {
+ gf_proc_dump_write("inodelk-count", "%d", count);
+ __dump_inodelks (pl_inode);
+ }
+ count = __get_posixlk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("posixlk-count", "%d", count);
+ __dump_posixlks (pl_inode);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
out:
- return ret;
-}
-
-
-
-/*
- * pl_dump_inode - inode dump function for posix locks
- *
- */
-int
-pl_dump_inode (xlator_t *this)
-{
-
- GF_ASSERT (this);
-
- if (this->itable) {
- inode_table_dump(this->itable,
- "xlator.features.locks.inode_table");
+ GF_FREE (pathname);
+
+ if (ret && inode) {
+ if (!section_added)
+ gf_proc_dump_add_section ("xlator.features.locks.%s."
+ "inode", this->name);
+ gf_proc_dump_write ("Unable to print lock state", "(Lock "
+ "acquisition failure) %s",
+ uuid_utoa (inode->gfid));
}
-
- return 0;
+ return ret;
}
int32_t
@@ -1795,6 +2505,124 @@ mem_acct_init (xlator_t *this)
return ret;
}
+
+pl_ctx_t*
+pl_ctx_get (client_t *client, xlator_t *xlator)
+{
+ void *tmp = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ client_ctx_get (client, xlator, &tmp);
+
+ ctx = tmp;
+
+ if (ctx != NULL)
+ goto out;
+
+ ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t);
+
+ if (ctx == NULL)
+ goto out;
+
+ ctx->ltable = pl_lock_table_new();
+
+ if (ctx->ltable == NULL) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ goto out;
+ }
+
+ LOCK_INIT (&ctx->ltable_lock);
+
+ if (client_ctx_set (client, xlator, ctx) != 0) {
+ LOCK_DESTROY (&ctx->ltable_lock);
+ GF_FREE (ctx->ltable);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+out:
+ return ctx;
+}
+
+static void
+ltable_delete_locks (struct _lock_table *ltable)
+{
+ struct _locker *locker = NULL;
+ struct _locker *tmp = NULL;
+
+ list_for_each_entry_safe (locker, tmp, &ltable->inodelk_lockers, lockers) {
+ if (locker->fd)
+ pl_del_locker (ltable, locker->volume, &locker->loc,
+ locker->fd, &locker->owner,
+ GF_FOP_INODELK);
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
+
+ list_for_each_entry_safe (locker, tmp, &ltable->entrylk_lockers, lockers) {
+ if (locker->fd)
+ pl_del_locker (ltable, locker->volume, &locker->loc,
+ locker->fd, &locker->owner,
+ GF_FOP_ENTRYLK);
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
+ GF_FREE (ltable);
+}
+
+
+static int32_t
+destroy_cbk (xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+ pl_ctx_t *locks_ctx = NULL;
+
+ client_ctx_del (client, this, &tmp);
+
+ if (tmp == NULL)
+ return 0
+;
+ locks_ctx = tmp;
+ if (locks_ctx->ltable)
+ ltable_delete_locks (locks_ctx->ltable);
+
+ LOCK_DESTROY (&locks_ctx->ltable_lock);
+ GF_FREE (locks_ctx);
+
+ return 0;
+}
+
+
+static int32_t
+disconnect_cbk (xlator_t *this, client_t *client)
+{
+ int32_t ret = 0;
+ pl_ctx_t *locks_ctx = NULL;
+ struct _lock_table *ltable = NULL;
+
+ locks_ctx = pl_ctx_get (client, this);
+ if (locks_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK (&locks_ctx->ltable_lock);
+ {
+ if (locks_ctx->ltable) {
+ ltable = locks_ctx->ltable;
+ locks_ctx->ltable = pl_lock_table_new ();
+ }
+ }
+ UNLOCK (&locks_ctx->ltable_lock);
+
+ if (ltable)
+ ltable_delete_locks (ltable);
+
+out:
+ return ret;
+}
+
+
int
init (xlator_t *this)
{
@@ -1823,7 +2651,7 @@ init (xlator_t *this)
gf_log (this->name, GF_LOG_CRITICAL,
"'locks' translator is not loaded over a storage "
"translator");
- goto out;;
+ goto out;
}
priv = GF_CALLOC (1, sizeof (*priv),
@@ -1844,13 +2672,20 @@ init (xlator_t *this)
}
}
+ this->local_pool = mem_pool_new (pl_local_t, 32);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
this->private = priv;
ret = 0;
out:
if (ret) {
- if (priv)
- GF_FREE (priv);
+ GF_FREE (priv);
}
return ret;
}
@@ -1865,6 +2700,7 @@ fini (xlator_t *this)
if (!priv)
return 0;
this->private = NULL;
+ GF_FREE (priv->brickname);
GF_FREE (priv);
return 0;
@@ -1873,21 +2709,23 @@ fini (xlator_t *this)
int
pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock);
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock);
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
pl_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int
pl_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
struct xlator_fops fops = {
.lookup = pl_lookup,
@@ -1904,6 +2742,10 @@ struct xlator_fops fops = {
.fentrylk = pl_fentrylk,
.flush = pl_flush,
.opendir = pl_opendir,
+ .readdirp = pl_readdirp,
+ .getxattr = pl_getxattr,
+ .fgetxattr = pl_fgetxattr,
+ .fsetxattr = pl_fsetxattr,
};
struct xlator_dumpops dumpops = {
@@ -1911,9 +2753,11 @@ struct xlator_dumpops dumpops = {
};
struct xlator_cbks cbks = {
- .forget = pl_forget,
- .release = pl_release,
- .releasedir = pl_releasedir,
+ .forget = pl_forget,
+ .release = pl_release,
+ .releasedir = pl_releasedir,
+ .client_destroy = destroy_cbk,
+ .client_disconnect = disconnect_cbk,
};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
index 4aac1803d..11abd26d8 100644
--- a/xlators/features/locks/src/reservelk.c
+++ b/xlators/features/locks/src/reservelk.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -81,10 +71,10 @@ out:
return ret_lock;
}
-static int
+static inline int
__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2)
{
- return ((l1->owner == l2->owner));
+ return (is_same_lkowner (&l1->owner, &l2->owner));
}
@@ -187,10 +177,10 @@ __lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
list_add_tail (&lock->list, &pl_inode->blocked_reservelks);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
@@ -292,14 +282,15 @@ grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode)
list_for_each_entry_safe (lock, tmp, &granted, list) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
- STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock,
+ NULL);
}
}
@@ -376,7 +367,9 @@ grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out (this, lock->frame, fd, NULL, cmd,
&lock->user_flock, -1, EAGAIN, NULL);
pl_update_refkeeper (this, fd->inode);
- STACK_UNWIND_STRICT (lk, lock->frame, -1, EAGAIN, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, -1,
+ EAGAIN, &lock->user_flock,
+ NULL);
__destroy_lock (lock);
}
}
@@ -429,18 +422,18 @@ pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
ret = __lock_reservelk (this, pl_inode, lock, can_block);
if (ret < 0)
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
else
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->fl_start,
lock->fl_end);
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
index e95612ad4..d2cca32de 100644
--- a/xlators/features/locks/tests/unit-test.c
+++ b/xlators/features/locks/tests/unit-test.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am
index 915c13e30..f8567edce 100644
--- a/xlators/features/mac-compat/src/Makefile.am
+++ b/xlators/features/mac-compat/src/Makefile.am
@@ -1,13 +1,14 @@
xlator_LTLIBRARIES = mac-compat.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-mac_compat_la_LDFLAGS = -module -avoidversion
+mac_compat_la_LDFLAGS = -module -avoid-version
mac_compat_la_SOURCES = mac-compat.c
mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c
index 188b593e4..7cb550ad5 100644
--- a/xlators/features/mac-compat/src/mac-compat.c
+++ b/xlators/features/mac-compat/src/mac-compat.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -57,7 +47,8 @@ static int32_t apple_xattr_len[] = {
int32_t
maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
intptr_t ax = (intptr_t)this->private;
int i = 0;
@@ -80,7 +71,7 @@ maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
@@ -88,7 +79,7 @@ maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -109,14 +100,14 @@ maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame, maccomp_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
int32_t
maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -137,21 +128,21 @@ maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
STACK_WIND (frame, maccomp_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
- fd, name);
+ fd, name, xdata);
return 0;
}
int32_t
maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
intptr_t ax = (intptr_t)this->private;
if (op_ret == -1 && ax != GF_XATTR_NONE)
op_ret = op_errno = 0;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -159,7 +150,7 @@ maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -177,14 +168,14 @@ maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
STACK_WIND (frame, maccomp_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
int32_t
maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -202,7 +193,7 @@ maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
STACK_WIND (frame, maccomp_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags);
+ fd, dict, flags, xdata);
return 0;
}
@@ -239,8 +230,7 @@ struct xlator_fops fops = {
.fsetxattr = maccomp_fsetxattr,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
{ .key = {NULL} },
diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am
index a6ba2de16..a985f42a8 100644
--- a/xlators/features/marker/Makefile.am
+++ b/xlators/features/marker/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = src @SYNCDAEMON_SUBDIR@
+SUBDIRS = src
CLEANFILES =
diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am
index 501586a76..a7c676472 100644
--- a/xlators/features/marker/src/Makefile.am
+++ b/xlators/features/marker/src/Makefile.am
@@ -1,15 +1,17 @@
xlator_LTLIBRARIES = marker.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-marker_la_LDFLAGS = -module -avoidversion
+marker_la_LDFLAGS = -module -avoid-version
marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c
marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c
index a413781bc..84a718add 100644
--- a/xlators/features/marker/src/marker-common.c
+++ b/xlators/features/marker/src/marker-common.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -69,18 +60,10 @@ unlock: UNLOCK (&inode->lock);
return ret;
}
-void
+int
marker_filter_quota_xattr (dict_t *dict, char *key,
data_t *value, void *data)
{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("marker", dict, out);
- GF_VALIDATE_OR_GOTO ("marker", key, out);
-
- ret = fnmatch ("trusted.glusterfs.quota*", key, 0);
- if (ret == 0)
- dict_del (dict, key);
-out:
- return;
+ dict_del (dict, key);
+ return 0;
}
diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h
index 0a7ee2619..23dd846cb 100644
--- a/xlators/features/marker/src/marker-common.h
+++ b/xlators/features/marker/src/marker-common.h
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_COMMON_H
#define _MARKER_COMMON_H
@@ -31,6 +22,6 @@
int32_t
marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **);
-void
+int
marker_filter_quota_xattr (dict_t *, char *, data_t *, void *);
#endif
diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h
index f2723dc26..1f74d5048 100644
--- a/xlators/features/marker/src/marker-mem-types.h
+++ b/xlators/features/marker/src/marker-mem-types.h
@@ -1,36 +1,24 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MARKER_MEM_TYPES_H__
#define __MARKER_MEM_TYPES_H__
#include "mem-types.h"
enum gf_marker_mem_types_ {
- gf_marker_mt_marker_local_t = gf_common_mt_end + 1,
- gf_marker_mt_marker_conf_t,
+ gf_marker_mt_marker_conf_t = gf_common_mt_end + 1,
gf_marker_mt_loc_t,
gf_marker_mt_volume_mark,
gf_marker_mt_int64_t,
gf_marker_mt_quota_inode_ctx_t,
gf_marker_mt_marker_inode_ctx_t,
- gf_marker_mt_quota_local_t,
gf_marker_mt_inode_contribution_t,
gf_marker_mt_end
};
diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c
index 2d5b234d1..af5fed132 100644
--- a/xlators/features/marker/src/marker-quota-helper.c
+++ b/xlators/features/marker/src/marker-quota-helper.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -28,16 +19,19 @@
#include "marker-mem-types.h"
int
-quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
{
int ret = -1;
- if (!loc)
- return ret;
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", path, out);
+ /* Not checking for parent because while filling
+ * loc of root, parent will be NULL
+ */
if (inode) {
loc->inode = inode_ref (inode);
- loc->ino = inode->ino;
}
if (parent)
@@ -59,13 +53,13 @@ quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
loc_wipe:
if (ret < 0)
loc_wipe (loc);
-
+out:
return ret;
}
int32_t
-quota_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc)
+mq_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc)
{
char *resolvedpath = NULL;
inode_t *parent = NULL;
@@ -74,7 +68,7 @@ quota_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc)
if ((!inode) || (!loc))
return ret;
- if ((inode) && (inode->ino == 1)) {
+ if ((inode) && __is_root_gfid (inode->gfid)) {
loc->parent = NULL;
goto ignore_parent;
}
@@ -93,7 +87,7 @@ ignore_parent:
if (ret < 0)
goto err;
- ret = quota_loc_fill (loc, inode, parent, resolvedpath);
+ ret = mq_loc_fill (loc, inode, parent, resolvedpath);
if (ret < 0)
goto err;
@@ -108,7 +102,7 @@ err:
quota_inode_ctx_t *
-quota_alloc_inode_ctx ()
+mq_alloc_inode_ctx ()
{
int32_t ret = -1;
quota_inode_ctx_t *ctx = NULL;
@@ -127,13 +121,13 @@ out:
}
inode_contribution_t *
-get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx)
+mq_get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx)
{
inode_contribution_t *contri = NULL;
inode_contribution_t *temp = NULL;
- GF_VALIDATE_OR_GOTO ("marker", inode, out);
- GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ if (!inode || !ctx)
+ goto out;
list_for_each_entry (temp, &ctx->contribution_head, contri_list) {
if (uuid_compare (temp->gfid, inode->gfid) == 0) {
@@ -147,8 +141,8 @@ out:
int32_t
-delete_contribution_node (dict_t *dict, char *key,
- inode_contribution_t *contribution)
+mq_delete_contribution_node (dict_t *dict, char *key,
+ inode_contribution_t *contribution)
{
if (dict_get (dict, key) != NULL)
goto out;
@@ -160,13 +154,25 @@ out:
inode_contribution_t *
-__add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
{
int32_t ret = 0;
inode_contribution_t *contribution = NULL;
+ if (!loc->parent) {
+ if (!uuid_is_null (loc->pargfid))
+ loc->parent = inode_find (loc->inode->table,
+ loc->pargfid);
+ if (!loc->parent)
+ loc->parent = inode_parent (loc->inode, loc->pargfid,
+ loc->name);
+ if (!loc->parent)
+ goto out;
+ }
+
list_for_each_entry (contribution, &ctx->contribution_head, contri_list) {
- if (uuid_compare (contribution->gfid, loc->parent->gfid) == 0) {
+ if (loc->parent &&
+ uuid_compare (contribution->gfid, loc->parent->gfid) == 0) {
goto out;
}
}
@@ -180,6 +186,7 @@ __add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
uuid_copy (contribution->gfid, loc->parent->gfid);
LOCK_INIT (&contribution->lock);
+ INIT_LIST_HEAD (&contribution->contri_list);
list_add_tail (&contribution->contri_list, &ctx->contribution_head);
@@ -189,7 +196,7 @@ out:
inode_contribution_t *
-add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
{
inode_contribution_t *contribution = NULL;
@@ -201,7 +208,7 @@ add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
LOCK (&ctx->lock);
{
- contribution = __add_new_contribution_node (this, ctx, loc);
+ contribution = __mq_add_new_contribution_node (this, ctx, loc);
}
UNLOCK (&ctx->lock);
@@ -210,8 +217,8 @@ add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
int32_t
-dict_set_contribution (xlator_t *this, dict_t *dict,
- loc_t *loc)
+mq_dict_set_contribution (xlator_t *this, dict_t *dict,
+ loc_t *loc)
{
int32_t ret = -1;
char contri_key [512] = {0, };
@@ -219,6 +226,7 @@ dict_set_contribution (xlator_t *this, dict_t *dict,
GF_VALIDATE_OR_GOTO ("marker", this, out);
GF_VALIDATE_OR_GOTO ("marker", dict, out);
GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->parent, out);
GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret);
if (ret < 0) {
@@ -241,8 +249,8 @@ out:
int32_t
-quota_inode_ctx_get (inode_t *inode, xlator_t *this,
- quota_inode_ctx_t **ctx)
+mq_inode_ctx_get (inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **ctx)
{
int32_t ret = -1;
uint64_t ctx_int = 0;
@@ -275,7 +283,7 @@ out:
quota_inode_ctx_t *
-__quota_inode_ctx_new (inode_t *inode, xlator_t *this)
+__mq_inode_ctx_new (inode_t *inode, xlator_t *this)
{
int32_t ret = -1;
quota_inode_ctx_t *quota_ctx = NULL;
@@ -291,7 +299,7 @@ __quota_inode_ctx_new (inode_t *inode, xlator_t *this)
LOCK (&inode->lock);
{
if (mark_ctx->quota_ctx == NULL) {
- quota_ctx = quota_alloc_inode_ctx ();
+ quota_ctx = mq_alloc_inode_ctx ();
if (quota_ctx == NULL) {
ret = -1;
goto unlock;
@@ -311,29 +319,23 @@ out:
quota_inode_ctx_t *
-quota_inode_ctx_new (inode_t * inode, xlator_t *this)
+mq_inode_ctx_new (inode_t * inode, xlator_t *this)
{
- return __quota_inode_ctx_new (inode, this);
+ return __mq_inode_ctx_new (inode, this);
}
quota_local_t *
-quota_local_new ()
+mq_local_new ()
{
- int32_t ret = -1;
quota_local_t *local = NULL;
- QUOTA_ALLOC (local, quota_local_t, ret);
- if (ret < 0)
+ local = mem_get0 (THIS->local_pool);
+ if (!local)
goto out;
local->ref = 1;
- local->delta = 0;
- local->err = 0;
LOCK_INIT (&local->lock);
- memset (&local->loc, 0, sizeof (loc_t));
- memset (&local->parent_loc, 0, sizeof (loc_t));
-
local->ctx = NULL;
local->contri = NULL;
@@ -342,7 +344,7 @@ out:
}
quota_local_t *
-quota_local_ref (quota_local_t *local)
+mq_local_ref (quota_local_t *local)
{
LOCK (&local->lock);
{
@@ -355,7 +357,7 @@ quota_local_ref (quota_local_t *local)
int32_t
-quota_local_unref (xlator_t *this, quota_local_t *local)
+mq_local_unref (xlator_t *this, quota_local_t *local)
{
int32_t ref = 0;
if (local == NULL)
@@ -363,7 +365,7 @@ quota_local_unref (xlator_t *this, quota_local_t *local)
QUOTA_SAFE_DECREMENT (&local->lock, local->ref, ref);
- if (ref > 0)
+ if (ref != 0)
goto out;
if (local->fd != NULL)
@@ -374,38 +376,36 @@ quota_local_unref (xlator_t *this, quota_local_t *local)
loc_wipe (&local->parent_loc);
LOCK_DESTROY (&local->lock);
+
+ mem_put (local);
out:
return 0;
}
inode_contribution_t *
-get_contribution_from_loc (xlator_t *this, loc_t *loc)
+mq_get_contribution_from_loc (xlator_t *this, loc_t *loc)
{
int32_t ret = 0;
quota_inode_ctx_t *ctx = NULL;
inode_contribution_t *contribution = NULL;
- ret = quota_inode_ctx_get (loc->inode, this, &ctx);
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
if (ret < 0) {
gf_log_callingfn (this->name, GF_LOG_WARNING,
"cannot get marker-quota context from inode "
- "(ino: %"PRId64", gfid:%s, path:%s)",
- loc->inode->ino,
- uuid_utoa (loc->inode->gfid),
- loc->path);
+ "(gfid:%s, path:%s)",
+ uuid_utoa (loc->inode->gfid), loc->path);
goto err;
}
- contribution = get_contribution_node (loc->parent, ctx);
+ contribution = mq_get_contribution_node (loc->parent, ctx);
if (contribution == NULL) {
gf_log_callingfn (this->name, GF_LOG_WARNING,
- "inode (ino:%"PRId64", gfid:%s, path:%s ) has"
- " no contribution towards parent (ino:%"PRId64
- ", gfid:%s)", loc->inode->ino,
+ "inode (gfid:%s, path:%s) has "
+ "no contribution towards parent (gfid:%s)",
uuid_utoa (loc->inode->gfid),
- loc->path, loc->parent->ino,
- uuid_utoa (loc->parent->gfid));
+ loc->path, uuid_utoa (loc->parent->gfid));
goto err;
}
diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h
index 9e9e6c241..6cdd14881 100644
--- a/xlators/features/marker/src/marker-quota-helper.h
+++ b/xlators/features/marker/src/marker-quota-helper.h
@@ -1,20 +1,13 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+
#ifndef _MARKER_QUOTA_HELPER_H
#define _MARKER_QUOTA_HELPER
@@ -23,7 +16,7 @@
#include "config.h"
#endif
-#include "marker-quota.h"
+#include "marker.h"
#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
do { \
@@ -48,36 +41,36 @@
} while (0)
inode_contribution_t *
-add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *);
+mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *);
int32_t
-dict_set_contribution (xlator_t *, dict_t *, loc_t *);
+mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *);
quota_inode_ctx_t *
-quota_inode_ctx_new (inode_t *, xlator_t *);
+mq_inode_ctx_new (inode_t *, xlator_t *);
int32_t
-quota_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **);
+mq_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **);
int32_t
-delete_contribution_node (dict_t *, char *, inode_contribution_t *);
+mq_delete_contribution_node (dict_t *, char *, inode_contribution_t *);
int32_t
-quota_inode_loc_fill (const char *, inode_t *, loc_t *);
+mq_inode_loc_fill (const char *, inode_t *, loc_t *);
quota_local_t *
-quota_local_new ();
+mq_local_new ();
quota_local_t *
-quota_local_ref (quota_local_t *);
+mq_local_ref (quota_local_t *);
int32_t
-quota_local_unref (xlator_t *, quota_local_t *);
+mq_local_unref (xlator_t *, quota_local_t *);
inode_contribution_t *
-get_contribution_node (inode_t *, quota_inode_ctx_t *);
+mq_get_contribution_node (inode_t *, quota_inode_ctx_t *);
inode_contribution_t *
-get_contribution_from_loc (xlator_t *this, loc_t *loc);
+mq_get_contribution_from_loc (xlator_t *this, loc_t *loc);
#endif
diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c
index cb50d71a7..6f9af6e13 100644
--- a/xlators/features/marker/src/marker-quota.c
+++ b/xlators/features/marker/src/marker-quota.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -30,6 +21,46 @@
#include "marker-quota.h"
#include "marker-quota-helper.h"
+int
+mq_loc_copy (loc_t *dst, loc_t *src)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", dst, out);
+ GF_VALIDATE_OR_GOTO ("marker", src, out);
+
+ if (src->inode == NULL ||
+ src->path == NULL) {
+ gf_log ("marker", GF_LOG_WARNING,
+ "src loc is not valid");
+ goto out;
+ }
+
+ ret = loc_copy (dst, src);
+out:
+ return ret;
+}
+
+int32_t
+mq_get_local_err (quota_local_t *local,
+ int32_t *val)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", local, out);
+ GF_VALIDATE_OR_GOTO ("marker", val, out);
+
+ LOCK (&local->lock);
+ {
+ *val = local->err;
+ }
+ UNLOCK (&local->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
int32_t
mq_get_ctx_updation_status (quota_inode_ctx_t *ctx,
gf_boolean_t *status)
@@ -112,15 +143,15 @@ mq_assign_lk_owner (xlator_t *this, call_frame_t *frame)
}
UNLOCK (&conf->lock);
- frame->root->lk_owner = lk_owner;
+ set_lk_owner_from_uint64 (&frame->root->lk_owner, lk_owner);
return;
}
int32_t
-loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc,
- uint64_t ino, char *name)
+mq_loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc,
+ uint64_t ino, char *name)
{
int32_t ret = -1;
int32_t len = 0;
@@ -131,8 +162,6 @@ loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc,
GF_VALIDATE_OR_GOTO ("marker", oldloc, out);
GF_VALIDATE_OR_GOTO ("marker", name, out);
- newloc->ino = ino;
-
newloc->inode = inode_new (oldloc->inode->table);
if (!newloc->inode) {
@@ -141,6 +170,7 @@ loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc,
}
newloc->parent = inode_ref (oldloc->inode);
+ uuid_copy (newloc->pargfid, oldloc->inode->gfid);
len = strlen (oldloc->path);
@@ -168,8 +198,8 @@ out:
}
int32_t
-dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+mq_dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
QUOTA_STACK_DESTROY (frame, this);
@@ -177,18 +207,20 @@ dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this,
}
int32_t
-release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
struct gf_flock lock = {0, };
quota_local_t *local = NULL;
+ loc_t loc = {0, };
+ int ret = -1;
local = frame->local;
if (op_ret == -1) {
local->err = -1;
- dirty_inode_updation_done (frame, NULL, this, 0, 0);
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -202,32 +234,50 @@ release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this,
lock.l_len = 0;
lock.l_pid = 0;
+ ret = loc_copy (&loc, &local->loc);
+ if (ret == -1) {
+ local->err = -1;
+ frame->local = NULL;
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
+ return 0;
+ }
+
+ if (local->loc.inode == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode is NULL, so can't stackwind.");
+ goto out;
+ }
+
STACK_WIND (frame,
- dirty_inode_updation_done,
+ mq_dirty_inode_updation_done,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc, F_SETLKW, &lock);
+ this->name, &loc, F_SETLKW, &lock, NULL);
+
+ loc_wipe (&loc);
+
+ return 0;
+out:
+ mq_dirty_inode_updation_done (frame, NULL, this, -1, 0, NULL);
return 0;
}
int32_t
-mark_inode_undirty (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+mq_mark_inode_undirty (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
int32_t ret = -1;
int64_t *size = NULL;
dict_t *newdict = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
local = (quota_local_t *) frame->local;
if (op_ret == -1)
goto err;
- priv = (marker_conf_t *) this->private;
-
if (!dict)
goto wind;
@@ -250,17 +300,21 @@ wind:
if (ret)
goto err;
- STACK_WIND (frame, release_lock_on_dirty_inode,
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+ STACK_WIND (frame, mq_release_lock_on_dirty_inode,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->loc, newdict, 0);
+ &local->loc, newdict, 0, NULL);
ret = 0;
err:
if (op_ret == -1 || ret == -1) {
local->err = -1;
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -270,24 +324,21 @@ err:
}
int32_t
-update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
{
int32_t ret = -1;
dict_t *new_dict = NULL;
int64_t *size = NULL;
int64_t *delta = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
local = frame->local;
if (op_ret == -1)
goto err;
- priv = this->private;
-
if (dict == NULL) {
gf_log (this->name, GF_LOG_WARNING,
"Dict is null while updating the size xattr %s",
@@ -319,9 +370,14 @@ update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
if (ret)
goto err;
- STACK_WIND (frame, mark_inode_undirty, FIRST_CHILD(this),
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_mark_inode_undirty, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, &local->loc,
- GF_XATTROP_ADD_ARRAY64, new_dict);
+ GF_XATTROP_ADD_ARRAY64, new_dict, NULL);
ret = 0;
@@ -329,7 +385,7 @@ err:
if (op_ret == -1 || ret == -1) {
local->err = -1;
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (new_dict)
@@ -339,17 +395,37 @@ err:
}
int32_t
-get_dirty_inode_size (call_frame_t *frame, xlator_t *this)
+mq_test_and_set_local_err(quota_local_t *local,
+ int32_t *val)
+{
+ int tmp = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", local, out);
+ GF_VALIDATE_OR_GOTO ("marker", val, out);
+
+ LOCK (&local->lock);
+ {
+ tmp = local->err;
+ local->err = *val;
+ *val = tmp;
+ }
+ UNLOCK (&local->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+mq_get_dirty_inode_size (call_frame_t *frame, xlator_t *this)
{
int32_t ret = -1;
dict_t *dict = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
local = (quota_local_t *) frame->local;
- priv = (marker_conf_t *) this->private;
-
dict = dict_new ();
if (!dict) {
ret = -1;
@@ -360,7 +436,12 @@ get_dirty_inode_size (call_frame_t *frame, xlator_t *this)
if (ret)
goto err;
- STACK_WIND (frame, update_size_xattr, FIRST_CHILD(this),
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_update_size_xattr, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, &local->loc, dict);
ret =0;
@@ -368,7 +449,7 @@ err:
if (ret) {
local->err = -1;
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (dict)
@@ -378,15 +459,15 @@ err:
}
int32_t
-get_child_contribution (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
+mq_get_child_contribution (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
int32_t ret = -1;
int32_t val = 0;
@@ -401,17 +482,20 @@ get_child_contribution (call_frame_t *frame,
QUOTA_STACK_DESTROY (frame, this);
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (op_errno));
-
- local->err = -2;
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ strerror (op_errno));
+ val = -2;
+ if (!mq_test_and_set_local_err (local, &val) &&
+ val != -2)
+ mq_release_lock_on_dirty_inode (local->frame, NULL,
+ this, 0, 0, NULL);
- release_lock_on_dirty_inode (local->frame, NULL, this, 0, 0);
-
- goto out;
+ goto exit;
}
- if (local->err)
- goto out;
+ ret = mq_get_local_err (local, &val);
+ if (!ret && val == -2)
+ goto exit;
GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret);
if (ret < 0)
@@ -430,29 +514,29 @@ out:
}
UNLOCK (&local->lock);
- if (val== 0) {
- if (local->err) {
- QUOTA_SAFE_DECREMENT (&local->lock, local->ref, val);
-
- quota_local_unref (this, local);
- } else
- quota_dirty_inode_readdir (local->frame, NULL, this,
- 0, 0, NULL);
+ if (val == 0) {
+ mq_dirty_inode_readdir (local->frame, NULL, this,
+ 0, 0, NULL, NULL);
}
+ mq_local_unref (this, local);
return 0;
+exit:
+ mq_local_unref (this, local);
+ return 0;
}
int32_t
-quota_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
+mq_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
char contri_key [512] = {0, };
int32_t ret = 0;
+ int32_t val = 0;
off_t offset = 0;
int32_t count = 0;
dict_t *dict = NULL;
@@ -461,20 +545,20 @@ quota_readdir_cbk (call_frame_t *frame,
call_frame_t *newframe = NULL;
loc_t loc = {0, };
- local = frame->local;
+ local = mq_local_ref (frame->local);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"readdir failed %s", strerror (op_errno));
local->err = -1;
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
- return 0;
+ goto end;
} else if (op_ret == 0) {
- get_dirty_inode_size (frame, this);
+ mq_get_dirty_inode_size (frame, this);
- return 0;
+ goto end;
}
local->dentry_child_count = 0;
@@ -493,16 +577,20 @@ quota_readdir_cbk (call_frame_t *frame,
count++;
}
+ if (count == 0) {
+ mq_get_dirty_inode_size (frame, this);
+ goto end;
+
+ }
+
local->frame = frame;
- if (count > 0) {
- LOCK (&local->lock);
- {
- local->dentry_child_count = count;
- local->d_off = offset;
- }
- UNLOCK (&local->lock);
+ LOCK (&local->lock);
+ {
+ local->dentry_child_count = count;
+ local->d_off = offset;
}
+ UNLOCK (&local->lock);
list_for_each_entry (entry, (&entries->list), list) {
@@ -515,18 +603,29 @@ quota_readdir_cbk (call_frame_t *frame,
continue;
}
- ret = loc_fill_from_name (this, &loc, &local->loc,
- entry->d_ino, entry->d_name);
+ ret = mq_loc_fill_from_name (this, &loc, &local->loc,
+ entry->d_ino, entry->d_name);
if (ret < 0)
goto out;
- newframe = copy_frame (frame);
- if (!newframe) {
- ret = -1;
- goto out;
+ ret = 0;
+
+ LOCK (&local->lock);
+ {
+ if (local->err != -2) {
+ newframe = copy_frame (frame);
+ if (!newframe) {
+ ret = -1;
+ }
+ } else
+ ret = -1;
}
+ UNLOCK (&local->lock);
- newframe->local = local;
+ if (ret == -1)
+ goto out;
+
+ newframe->local = mq_local_ref (local);
dict = dict_new ();
if (!dict) {
@@ -543,7 +642,7 @@ quota_readdir_cbk (call_frame_t *frame,
goto out;
STACK_WIND (newframe,
- get_child_contribution,
+ mq_get_child_contribution,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
&loc, dict);
@@ -552,6 +651,8 @@ quota_readdir_cbk (call_frame_t *frame,
loc_wipe (&loc);
+ newframe = NULL;
+
out:
if (dict) {
dict_unref (dict);
@@ -559,18 +660,12 @@ quota_readdir_cbk (call_frame_t *frame,
}
if (ret) {
- LOCK (&local->lock);
- {
- if (local->dentry_child_count == 0)
- local->err = -1;
- else
- local->err = -2;
- }
- UNLOCK (&local->lock);
+ val = -2;
+ mq_test_and_set_local_err (local, &val);
if (newframe) {
newframe->local = NULL;
-
+ mq_local_unref(this, local);
QUOTA_STACK_DESTROY (newframe, this);
}
@@ -578,22 +673,22 @@ quota_readdir_cbk (call_frame_t *frame,
}
}
- if (ret) {
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
- } else if (count == 0 ) {
- get_dirty_inode_size (frame, this);
+ if (ret && val != -2) {
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
+end:
+ mq_local_unref (this, local);
return 0;
}
int32_t
-quota_dirty_inode_readdir (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+mq_dirty_inode_readdir (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -601,7 +696,7 @@ quota_dirty_inode_readdir (call_frame_t *frame,
if (op_ret == -1) {
local->err = -1;
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -609,30 +704,29 @@ quota_dirty_inode_readdir (call_frame_t *frame,
local->fd = fd_ref (fd);
STACK_WIND (frame,
- quota_readdir_cbk,
+ mq_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- local->fd, READDIR_BUF, local->d_off);
+ local->fd, READDIR_BUF, local->d_off, xdata);
return 0;
}
int32_t
-check_if_still_dirty (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
+mq_check_if_still_dirty (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
int8_t dirty = -1;
int32_t ret = -1;
fd_t *fd = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
local = frame->local;
@@ -642,8 +736,6 @@ check_if_still_dirty (call_frame_t *frame,
goto err;
}
- priv = this->private;
-
if (!dict) {
ret = -1;
goto err;
@@ -655,7 +747,7 @@ check_if_still_dirty (call_frame_t *frame,
//the inode is not dirty anymore
if (dirty == 0) {
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -664,18 +756,22 @@ check_if_still_dirty (call_frame_t *frame,
local->d_off = 0;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
STACK_WIND(frame,
- quota_dirty_inode_readdir,
+ mq_dirty_inode_readdir,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
ret = 0;
err:
if (op_ret == -1 || ret == -1) {
local->err = -1;
- release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (fd != NULL) {
@@ -686,21 +782,18 @@ err:
}
int32_t
-get_dirty_xattr (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+mq_get_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
dict_t *xattr_req = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
if (op_ret == -1) {
- dirty_inode_updation_done (frame, NULL, this, 0, 0);
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
- priv = (marker_conf_t *) this->private;
-
local = frame->local;
xattr_req = dict_new ();
@@ -713,8 +806,13 @@ get_dirty_xattr (call_frame_t *frame, void *cookie,
if (ret)
goto err;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame,
- check_if_still_dirty,
+ mq_check_if_still_dirty,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
&local->loc,
@@ -724,7 +822,7 @@ get_dirty_xattr (call_frame_t *frame, void *cookie,
err:
if (ret) {
local->err = -1;
- release_lock_on_dirty_inode(frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode(frame, NULL, this, 0, 0, NULL);
}
if (xattr_req)
@@ -737,10 +835,10 @@ err:
* 0 other wise
*/
int32_t
-update_dirty_inode (xlator_t *this,
- loc_t *loc,
- quota_inode_ctx_t *ctx,
- inode_contribution_t *contribution)
+mq_update_dirty_inode (xlator_t *this,
+ loc_t *loc,
+ quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -762,13 +860,12 @@ update_dirty_inode (xlator_t *this,
mq_assign_lk_owner (this, frame);
- local = quota_local_new ();
+ local = mq_local_new ();
if (local == NULL)
goto fr_destroy;
frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
+ ret = mq_loc_copy (&local->loc, loc);
if (ret < 0)
goto fr_destroy;
@@ -781,11 +878,18 @@ update_dirty_inode (xlator_t *this,
lock.l_start = 0;
lock.l_len = 0;
+ if (local->loc.inode == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode is NULL, so can't stackwind.");
+ goto fr_destroy;
+ }
+
STACK_WIND (frame,
- get_dirty_xattr,
+ mq_get_dirty_xattr,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc, F_SETLKW, &lock);
+ this->name, &local->loc, F_SETLKW, &lock, NULL);
return 1;
fr_destroy:
@@ -797,12 +901,20 @@ out:
int32_t
-quota_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+mq_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ quota_local_t *local = NULL;
+
if (frame == NULL)
return 0;
+ local = frame->local;
+
+ if (local != NULL) {
+ mq_initiate_quota_txn (this, &local->loc);
+ }
+
QUOTA_STACK_DESTROY (frame, this);
return 0;
@@ -810,9 +922,9 @@ quota_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
-quota_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
struct gf_flock lock = {0, };
quota_local_t *local = NULL;
@@ -826,24 +938,24 @@ quota_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
lock.l_pid = 0;
STACK_WIND (frame,
- quota_inode_creation_done,
+ mq_inode_creation_done,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, &local->loc,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
}
int32_t
-create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+mq_create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
int32_t ret = -1;
dict_t *newdict = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
if (op_ret < 0) {
goto err;
@@ -851,8 +963,6 @@ create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- priv = (marker_conf_t *) this->private;
-
if (local->loc.inode->ia_type == IA_IFDIR) {
newdict = dict_new ();
if (!newdict) {
@@ -864,19 +974,22 @@ create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
- STACK_WIND (frame, quota_xattr_creation_release_lock,
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_xattr_creation_release_lock,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->loc, newdict, 0);
+ &local->loc, newdict, 0, NULL);
} else {
- quota_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
}
ret = 0;
err:
if (ret < 0) {
- quota_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict != NULL)
@@ -887,7 +1000,7 @@ err:
int32_t
-quota_create_xattr (xlator_t *this, call_frame_t *frame)
+mq_create_xattr (xlator_t *this, call_frame_t *frame)
{
int32_t ret = 0;
int64_t *value = NULL;
@@ -895,7 +1008,6 @@ quota_create_xattr (xlator_t *this, call_frame_t *frame)
dict_t *dict = NULL;
char key[512] = {0, };
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
quota_inode_ctx_t *ctx = NULL;
inode_contribution_t *contri = NULL;
@@ -904,14 +1016,12 @@ quota_create_xattr (xlator_t *this, call_frame_t *frame)
local = frame->local;
- priv = (marker_conf_t *) this->private;
-
- ret = quota_inode_ctx_get (local->loc.inode, this, &ctx);
+ ret = mq_inode_ctx_get (local->loc.inode, this, &ctx);
if (ret < 0) {
- ctx = quota_inode_ctx_new (local->loc.inode, this);
+ ctx = mq_inode_ctx_new (local->loc.inode, this);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota_inode_ctx_new failed");
+ "mq_inode_ctx_new failed");
ret = -1;
goto out;
}
@@ -929,7 +1039,7 @@ quota_create_xattr (xlator_t *this, call_frame_t *frame)
}
if (strcmp (local->loc.path, "/") != 0) {
- contri = add_new_contribution_node (this, ctx, &local->loc);
+ contri = mq_add_new_contribution_node (this, ctx, &local->loc);
if (contri == NULL)
goto err;
@@ -941,9 +1051,11 @@ quota_create_xattr (xlator_t *this, call_frame_t *frame)
goto free_value;
}
- STACK_WIND (frame, create_dirty_xattr, FIRST_CHILD(this),
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_create_dirty_xattr, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, &local->loc,
- GF_XATTROP_ADD_ARRAY64, dict);
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
ret = 0;
free_size:
@@ -961,7 +1073,7 @@ err:
out:
if (ret < 0) {
- quota_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
}
return 0;
@@ -969,15 +1081,14 @@ out:
int32_t
-quota_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *dict,
- struct iatt *postparent)
+mq_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *dict,
+ struct iatt *postparent)
{
quota_local_t *local = NULL;
int64_t *size = NULL, *contri = NULL;
int8_t dirty = 0;
- marker_conf_t *priv = NULL;
int32_t ret = 0;
char contri_key[512] = {0, };
@@ -986,7 +1097,6 @@ quota_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
}
local = frame->local;
- priv = this->private;
ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
if (ret < 0)
@@ -1008,18 +1118,22 @@ quota_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
}
out:
- quota_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
return 0;
create_xattr:
- quota_create_xattr (this, frame);
+ if (uuid_is_null (local->loc.gfid)) {
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+ }
+
+ mq_create_xattr (this, frame);
return 0;
}
int32_t
-quota_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
dict_t *xattr_req = NULL;
quota_local_t *local = NULL;
@@ -1036,13 +1150,18 @@ quota_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
- ret = quota_req_xattr (this, &local->loc, xattr_req);
+ ret = mq_req_xattr (this, &local->loc, xattr_req);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING, "cannot request xattr");
goto err;
}
- STACK_WIND (frame, quota_check_n_set_inode_xattr, FIRST_CHILD(this),
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_check_n_set_inode_xattr, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, &local->loc, xattr_req);
dict_unref (xattr_req);
@@ -1050,20 +1169,20 @@ quota_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- quota_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
if (xattr_req)
dict_unref (xattr_req);
return 0;
lock_err:
- quota_inode_creation_done (frame, NULL, this, 0, 0);
+ mq_inode_creation_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
int32_t
-quota_set_inode_xattr (xlator_t *this, loc_t *loc)
+mq_set_inode_xattr (xlator_t *this, loc_t *loc)
{
struct gf_flock lock = {0, };
quota_local_t *local = NULL;
@@ -1076,7 +1195,7 @@ quota_set_inode_xattr (xlator_t *this, loc_t *loc)
goto err;
}
- local = quota_local_new ();
+ local = mq_local_new ();
if (local == NULL) {
goto err;
}
@@ -1096,10 +1215,10 @@ quota_set_inode_xattr (xlator_t *this, loc_t *loc)
lock.l_whence = SEEK_SET;
STACK_WIND (frame,
- quota_get_xattr,
+ mq_get_xattr,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc, F_SETLKW, &lock);
+ this->name, &local->loc, F_SETLKW, &lock, NULL);
return 0;
@@ -1111,38 +1230,68 @@ err:
int32_t
-get_parent_inode_local (xlator_t *this, quota_local_t *local)
+mq_get_parent_inode_local (xlator_t *this, quota_local_t *local)
{
- int32_t ret;
+ int32_t ret = -1;
quota_inode_ctx_t *ctx = NULL;
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", local, out);
+
+ local->contri = NULL;
+
loc_wipe (&local->loc);
- loc_copy (&local->loc, &local->parent_loc);
+ ret = mq_loc_copy (&local->loc, &local->parent_loc);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "loc copy failed");
+ goto out;
+ }
loc_wipe (&local->parent_loc);
- quota_inode_loc_fill (NULL, local->loc.parent, &local->parent_loc);
+ ret = mq_inode_loc_fill (NULL, local->loc.parent,
+ &local->parent_loc);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "failed to build parent loc of %s",
+ local->loc.path);
+ goto out;
+ }
- ret = quota_inode_ctx_get (local->loc.inode, this, &ctx);
- if (ret < 0)
- return -1;
+ ret = mq_inode_ctx_get (local->loc.inode, this, &ctx);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "inode ctx get failed");
+ goto out;
+ }
local->ctx = ctx;
+ if (list_empty (&ctx->contribution_head)) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "contribution node list is empty which "
+ "is an error");
+ ret = -1;
+ goto out;
+ }
+
local->contri = (inode_contribution_t *) ctx->contribution_head.next;
- return 0;
+ ret = 0;
+out:
+ return ret;
}
int32_t
-xattr_updation_done (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+mq_xattr_updation_done (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
QUOTA_STACK_DESTROY (frame, this);
return 0;
@@ -1150,8 +1299,8 @@ xattr_updation_done (call_frame_t *frame,
int32_t
-quota_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+mq_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
gf_boolean_t status = _gf_false;
@@ -1165,7 +1314,7 @@ quota_inodelk_cbk (call_frame_t *frame, void *cookie,
"unlocking failed on path (%s)(%s)",
local->parent_loc.path, strerror (op_errno));
}
- xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
return 0;
}
@@ -1175,20 +1324,22 @@ quota_inodelk_cbk (call_frame_t *frame, void *cookie,
if ((strcmp (local->parent_loc.path, "/") == 0)
|| (local->delta == 0)) {
- xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
} else {
- ret = get_parent_inode_local (this, local);
+ ret = mq_get_parent_inode_local (this, local);
if (ret < 0) {
- xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
+ NULL);
goto out;
}
status = _gf_true;
ret = mq_test_and_set_ctx_updation_status (local->ctx, &status);
if (ret == 0 && status == _gf_false) {
- get_lock_on_parent (frame, this);
+ mq_get_lock_on_parent (frame, this);
} else {
- xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
+ NULL);
}
}
out:
@@ -1198,9 +1349,9 @@ out:
//now release lock on the parent inode
int32_t
-quota_release_parent_lock (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+mq_release_parent_lock (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
quota_local_t *local = NULL;
@@ -1218,7 +1369,7 @@ quota_release_parent_lock (call_frame_t *frame, void *cookie,
strerror (local->err));
}
- ret = quota_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
if (ret < 0)
goto wind;
@@ -1228,6 +1379,12 @@ quota_release_parent_lock (call_frame_t *frame, void *cookie,
}
UNLOCK (&ctx->lock);
+ if (local->parent_loc.inode == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Invalid parent inode.");
+ goto err;
+ }
+
wind:
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
@@ -1236,30 +1393,33 @@ wind:
lock.l_pid = 0;
STACK_WIND (frame,
- quota_inodelk_cbk,
+ mq_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, &local->parent_loc,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
+err:
+ mq_xattr_updation_done (frame, NULL, this,
+ 0, 0 , NULL, NULL);
+ return 0;
}
int32_t
-quota_mark_undirty (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+mq_mark_undirty (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
int32_t ret = -1;
int64_t *size = NULL;
dict_t *newdict = NULL;
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
- marker_conf_t *priv = NULL;
local = frame->local;
@@ -1271,11 +1431,9 @@ quota_mark_undirty (call_frame_t *frame,
goto err;
}
- priv = this->private;
-
//update the size of the parent inode
if (dict != NULL) {
- ret = quota_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
if (ret < 0) {
op_errno = EINVAL;
goto err;
@@ -1310,17 +1468,20 @@ quota_mark_undirty (call_frame_t *frame,
goto err;
}
- STACK_WIND (frame, quota_release_parent_lock,
+ uuid_copy (local->parent_loc.gfid, local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
+ STACK_WIND (frame, mq_release_parent_lock,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->parent_loc, newdict, 0);
+ &local->parent_loc, newdict, 0, NULL);
ret = 0;
err:
if (op_ret == -1 || ret == -1) {
local->err = op_errno;
- quota_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1331,17 +1492,16 @@ err:
int32_t
-quota_update_parent_size (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+mq_update_parent_size (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
int64_t *size = NULL;
int32_t ret = -1;
dict_t *newdict = NULL;
- marker_conf_t *priv = NULL;
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -1365,14 +1525,12 @@ quota_update_parent_size (call_frame_t *frame,
local->loc.path, local->ctx->size,
local->contri->contribution);
- priv = this->private;
-
if (dict == NULL) {
op_errno = EINVAL;
goto err;
}
- ret = quota_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
if (ret < 0) {
op_errno = EINVAL;
goto err;
@@ -1395,18 +1553,23 @@ quota_update_parent_size (call_frame_t *frame,
goto err;
}
+ if (uuid_is_null (local->parent_loc.gfid))
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
STACK_WIND (frame,
- quota_mark_undirty,
+ mq_mark_undirty,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
&local->parent_loc,
GF_XATTROP_ADD_ARRAY64,
- newdict);
+ newdict, NULL);
ret = 0;
err:
if (op_ret == -1 || ret < 0) {
local->err = op_errno;
- quota_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1416,11 +1579,11 @@ err:
}
int32_t
-quota_update_inode_contribution (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict,
- struct iatt *postparent)
+mq_update_inode_contribution (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict,
+ struct iatt *postparent)
{
int32_t ret = -1;
int64_t *size = NULL, size_int = 0, contri_int = 0;
@@ -1430,7 +1593,6 @@ quota_update_inode_contribution (call_frame_t *frame, void *cookie,
dict_t *newdict = NULL;
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
- marker_conf_t *priv = NULL;
inode_contribution_t *contribution = NULL;
local = frame->local;
@@ -1443,8 +1605,6 @@ quota_update_inode_contribution (call_frame_t *frame, void *cookie,
goto err;
}
- priv = this->private;
-
ctx = local->ctx;
contribution = local->contri;
@@ -1497,7 +1657,7 @@ unlock:
local->delta = size_int - contri_int;
if (local->delta == 0) {
- quota_mark_undirty (frame, NULL, this, 0, 0, NULL);
+ mq_mark_undirty (frame, NULL, this, 0, 0, NULL, NULL);
return 0;
}
@@ -1519,20 +1679,25 @@ unlock:
goto err;
}
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame,
- quota_update_parent_size,
+ mq_update_parent_size,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
&local->loc,
GF_XATTROP_ADD_ARRAY64,
- newdict);
+ newdict, NULL);
ret = 0;
err:
if (op_ret == -1 || ret < 0) {
local->err = op_errno;
- quota_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1542,15 +1707,14 @@ err:
}
int32_t
-quota_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
char contri_key [512] = {0, };
dict_t *newdict = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
quota_inode_ctx_t *ctx = NULL;
local = frame->local;
@@ -1568,10 +1732,8 @@ quota_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
gf_log (this->name, GF_LOG_DEBUG, "%s marked dirty", local->parent_loc.path);
- priv = this->private;
-
//update parent ctx
- ret = quota_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
if (ret == -1) {
op_errno = EINVAL;
goto err;
@@ -1591,6 +1753,11 @@ quota_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
if (local->loc.inode->ia_type == IA_IFDIR) {
ret = dict_set_int64 (newdict, QUOTA_SIZE_KEY, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_set failed.");
+ goto err;
+ }
}
GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
@@ -1600,21 +1767,31 @@ quota_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
}
ret = dict_set_int64 (newdict, contri_key, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_set failed.");
+ goto err;
+ }
mq_set_ctx_updation_status (local->ctx, _gf_false);
- STACK_WIND (frame, quota_update_inode_contribution, FIRST_CHILD(this),
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_update_inode_contribution, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, &local->loc, newdict);
ret = 0;
err:
- if ((op_ret == -1) || (ret == -1)) {
+ if ((op_ret == -1) || (ret < 0)) {
local->err = op_errno;
mq_set_ctx_updation_status (local->ctx, _gf_false);
- quota_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1624,13 +1801,12 @@ err:
}
int32_t
-quota_markdirty (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+mq_markdirty (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
dict_t *dict = NULL;
quota_local_t *local = NULL;
- marker_conf_t *priv = NULL;
local = frame->local;
@@ -1643,7 +1819,7 @@ quota_markdirty (call_frame_t *frame, void *cookie,
mq_set_ctx_updation_status (local->ctx, _gf_false);
- quota_inodelk_cbk (frame, NULL, this, 0, 0);
+ mq_inodelk_cbk (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -1651,8 +1827,6 @@ quota_markdirty (call_frame_t *frame, void *cookie,
gf_log (this->name, GF_LOG_TRACE,
"inodelk succeeded on %s", local->parent_loc.path);
- priv = this->private;
-
dict = dict_new ();
if (!dict) {
ret = -1;
@@ -1663,10 +1837,14 @@ quota_markdirty (call_frame_t *frame, void *cookie,
if (ret == -1)
goto err;
- STACK_WIND (frame, quota_fetch_child_size_and_contri,
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
+ STACK_WIND (frame, mq_fetch_child_size_and_contri,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->parent_loc, dict, 0);
+ &local->parent_loc, dict, 0, NULL);
ret = 0;
err:
@@ -1675,7 +1853,7 @@ err:
mq_set_ctx_updation_status (local->ctx, _gf_false);
- quota_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (dict)
@@ -1686,7 +1864,7 @@ err:
int32_t
-get_lock_on_parent (call_frame_t *frame, xlator_t *this)
+mq_get_lock_on_parent (call_frame_t *frame, xlator_t *this)
{
struct gf_flock lock = {0, };
quota_local_t *local = NULL;
@@ -1697,16 +1875,23 @@ get_lock_on_parent (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG, "taking lock on %s",
local->parent_loc.path);
+ if (local->parent_loc.inode == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "parent inode is not valid, aborting "
+ "transaction.");
+ goto fr_destroy;
+ }
+
lock.l_len = 0;
lock.l_start = 0;
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
STACK_WIND (frame,
- quota_markdirty,
+ mq_markdirty,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock);
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
return 0;
@@ -1718,9 +1903,9 @@ fr_destroy:
int
-start_quota_txn (xlator_t *this, loc_t *loc,
- quota_inode_ctx_t *ctx,
- inode_contribution_t *contri)
+mq_start_quota_txn (xlator_t *this, loc_t *loc,
+ quota_inode_ctx_t *ctx,
+ inode_contribution_t *contri)
{
int32_t ret = -1;
call_frame_t *frame = NULL;
@@ -1732,25 +1917,25 @@ start_quota_txn (xlator_t *this, loc_t *loc,
mq_assign_lk_owner (this, frame);
- local = quota_local_new ();
+ local = mq_local_new ();
if (local == NULL)
goto fr_destroy;
frame->local = local;
- ret = loc_copy (&local->loc, loc);
+ ret = mq_loc_copy (&local->loc, loc);
if (ret < 0)
goto fr_destroy;
- ret = quota_inode_loc_fill (NULL, local->loc.parent,
- &local->parent_loc);
+ ret = mq_inode_loc_fill (NULL, local->loc.parent,
+ &local->parent_loc);
if (ret < 0)
goto fr_destroy;
local->ctx = ctx;
local->contri = contri;
- ret = get_lock_on_parent (frame, this);
+ ret = mq_get_lock_on_parent (frame, this);
if (ret == -1)
goto err;
@@ -1766,16 +1951,18 @@ err:
int
-initiate_quota_txn (xlator_t *this, loc_t *loc)
+mq_initiate_quota_txn (xlator_t *this, loc_t *loc)
{
int32_t ret = -1;
gf_boolean_t status = _gf_false;
quota_inode_ctx_t *ctx = NULL;
inode_contribution_t *contribution = NULL;
- VALIDATE_OR_GOTO (loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
- ret = quota_inode_ctx_get (loc->inode, this, &ctx);
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"inode ctx get failed, aborting quota txn");
@@ -1783,7 +1970,7 @@ initiate_quota_txn (xlator_t *this, loc_t *loc)
goto out;
}
- contribution = get_contribution_node (loc->parent, ctx);
+ contribution = mq_get_contribution_node (loc->parent, ctx);
if (contribution == NULL)
goto out;
@@ -1797,7 +1984,7 @@ initiate_quota_txn (xlator_t *this, loc_t *loc)
goto out;
if (status == _gf_false) {
- start_quota_txn (this, loc, ctx, contribution);
+ mq_start_quota_txn (this, loc, ctx, contribution);
}
ret = 0;
@@ -1811,7 +1998,7 @@ out:
/* int64_t contribution) */
/* { */
/* if (size != contribution) { */
-/* initiate_quota_txn (this, loc); */
+/* mq_initiate_quota_txn (this, loc); */
/* } */
/* return 0; */
@@ -1819,31 +2006,39 @@ out:
int32_t
-inspect_directory_xattr (xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- struct iatt buf)
+mq_inspect_directory_xattr (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ struct iatt buf)
{
int32_t ret = 0;
int8_t dirty = -1;
int64_t *size = NULL, size_int = 0;
int64_t *contri = NULL, contri_int = 0;
char contri_key [512] = {0, };
- marker_conf_t *priv = NULL;
gf_boolean_t not_root = _gf_false;
quota_inode_ctx_t *ctx = NULL;
inode_contribution_t *contribution = NULL;
- priv = this->private;
-
- ret = quota_inode_ctx_get (loc->inode, this, &ctx);
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
if (ret < 0) {
- ctx = quota_inode_ctx_new (loc->inode, this);
+ ctx = mq_inode_ctx_new (loc->inode, this);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota_inode_ctx_new failed");
+ "mq_inode_ctx_new failed");
ret = -1;
- goto out;
+ goto err;
+ }
+ }
+
+ if (strcmp (loc->path, "/") != 0) {
+ contribution = mq_add_new_contribution_node (this, ctx, loc);
+ if (contribution == NULL) {
+ if (!uuid_is_null (loc->inode->gfid))
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot add a new contribution node");
+ ret = -1;
+ goto err;
}
}
@@ -1858,13 +2053,6 @@ inspect_directory_xattr (xlator_t *this,
if (strcmp (loc->path, "/") != 0) {
not_root = _gf_true;
- contribution = add_new_contribution_node (this, ctx, loc);
- if (contribution == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot add a new contributio node");
- goto out;
- }
-
GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
if (ret < 0)
goto out;
@@ -1893,50 +2081,47 @@ inspect_directory_xattr (xlator_t *this,
" contri=%"PRId64, size_int, contri_int);
if (dirty) {
- ret = update_dirty_inode (this, loc, ctx, contribution);
+ ret = mq_update_dirty_inode (this, loc, ctx, contribution);
}
if ((!dirty || ret == 0) && (not_root == _gf_true) &&
(size_int != contri_int)) {
- initiate_quota_txn (this, loc);
+ mq_initiate_quota_txn (this, loc);
}
ret = 0;
out:
if (ret)
- quota_set_inode_xattr (this, loc);
-
- return 0;
+ mq_set_inode_xattr (this, loc);
+err:
+ return ret;
}
int32_t
-inspect_file_xattr (xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- struct iatt buf)
+mq_inspect_file_xattr (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ struct iatt buf)
{
int32_t ret = -1;
uint64_t contri_int = 0, size = 0;
int64_t *contri_ptr = NULL;
char contri_key [512] = {0, };
- marker_conf_t *priv = NULL;
quota_inode_ctx_t *ctx = NULL;
inode_contribution_t *contribution = NULL;
- priv = this->private;
-
- ret = quota_inode_ctx_get (loc->inode, this, &ctx);
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
if (ret < 0) {
- ctx = quota_inode_ctx_new (loc->inode, this);
+ ctx = mq_inode_ctx_new (loc->inode, this);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota_inode_ctx_new failed");
+ "mq_inode_ctx_new failed");
ret = -1;
goto out;
}
}
- contribution = add_new_contribution_node (this, ctx, loc);
+ contribution = mq_add_new_contribution_node (this, ctx, loc);
if (contribution == NULL)
goto out;
@@ -1968,10 +2153,10 @@ inspect_file_xattr (xlator_t *this,
"size=%"PRId64 " contri=%"PRId64, size, contri_int);
if (size != contri_int) {
- initiate_quota_txn (this, loc);
+ mq_initiate_quota_txn (this, loc);
}
} else
- initiate_quota_txn (this, loc);
+ mq_initiate_quota_txn (this, loc);
}
out:
@@ -1979,39 +2164,38 @@ out:
}
int32_t
-quota_xattr_state (xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- struct iatt buf)
+mq_xattr_state (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ struct iatt buf)
{
if (buf.ia_type == IA_IFREG ||
buf.ia_type == IA_IFLNK) {
- inspect_file_xattr (this, loc, dict, buf);
+ mq_inspect_file_xattr (this, loc, dict, buf);
} else if (buf.ia_type == IA_IFDIR)
- inspect_directory_xattr (this, loc, dict, buf);
+ mq_inspect_directory_xattr (this, loc, dict, buf);
return 0;
}
int32_t
-quota_req_xattr (xlator_t *this,
- loc_t *loc,
- dict_t *dict)
+mq_req_xattr (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict)
{
int32_t ret = -1;
- marker_conf_t *priv = NULL;
GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", loc, out);
GF_VALIDATE_OR_GOTO ("marker", dict, out);
- priv = this->private;
+ if (!loc)
+ goto set_size;
//if not "/" then request contribution
if (strcmp (loc->path, "/") == 0)
goto set_size;
- ret = dict_set_contribution (this, dict, loc);
+ ret = mq_dict_set_contribution (this, dict, loc);
if (ret == -1)
goto out;
@@ -2036,8 +2220,8 @@ out:
int32_t
-quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+mq_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
QUOTA_STACK_DESTROY (frame, this);
@@ -2045,8 +2229,8 @@ quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int32_t
-quota_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+_mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
char contri_key [512] = {0, };
@@ -2055,7 +2239,7 @@ quota_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
local = (quota_local_t *) frame->local;
if (op_ret == -1 || local->err == -1) {
- quota_removexattr_cbk (frame, NULL, this, -1, 0);
+ mq_removexattr_cbk (frame, NULL, this, -1, 0, NULL);
return 0;
}
@@ -2064,30 +2248,31 @@ quota_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->hl_count > 1) {
GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
- STACK_WIND (frame, quota_removexattr_cbk, FIRST_CHILD(this),
+ STACK_WIND (frame, mq_removexattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- &local->loc, contri_key);
+ &local->loc, contri_key, NULL);
ret = 0;
} else {
- quota_removexattr_cbk (frame, NULL, this, 0, 0);
+ mq_removexattr_cbk (frame, NULL, this, 0, 0, NULL);
}
if (strcmp (local->parent_loc.path, "/") != 0) {
- get_parent_inode_local (this, local);
+ ret = mq_get_parent_inode_local (this, local);
+ if (ret < 0)
+ goto out;
- start_quota_txn (this, &local->loc, local->ctx, local->contri);
+ mq_start_quota_txn (this, &local->loc, local->ctx, local->contri);
}
-
- /* TODO: free local in quota_local_unref only*/
- quota_local_unref (this, local);
- GF_FREE (local);
+out:
+ mq_local_unref (this, local);
return 0;
}
int32_t
mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
int32_t ret = -1;
struct gf_flock lock = {0, };
@@ -2099,7 +2284,7 @@ mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1)
local->err = -1;
- ret = quota_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
LOCK (&local->contri->lock);
{
@@ -2130,24 +2315,22 @@ mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
lock.l_pid = 0;
STACK_WIND (frame,
- quota_inode_remove_done,
+ _mq_inode_remove_done,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, &local->parent_loc,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
}
int32_t
-mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
int64_t *size = NULL;
dict_t *dict = NULL;
- marker_conf_t *priv = NULL;
quota_local_t *local = NULL;
- inode_contribution_t *contribution = NULL;
local = frame->local;
if (op_ret == -1) {
@@ -2159,10 +2342,6 @@ mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie,
VALIDATE_OR_GOTO (local->contri, err);
- priv = this->private;
-
- contribution = local->contri;
-
dict = dict_new ();
if (dict == NULL) {
ret = -1;
@@ -2177,28 +2356,30 @@ mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie,
if (ret < 0)
goto err;
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
STACK_WIND (frame, mq_inode_remove_done, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, &local->parent_loc,
- GF_XATTROP_ADD_ARRAY64, dict);
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
dict_unref (dict);
return 0;
err:
local->err = 1;
- mq_inode_remove_done (frame, NULL, this, -1, 0, NULL);
+ mq_inode_remove_done (frame, NULL, this, -1, 0, NULL, NULL);
if (dict)
dict_unref (dict);
return 0;
}
int32_t
-reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
+mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
{
int32_t ret = -1;
struct gf_flock lock = {0,};
call_frame_t *frame = NULL;
- marker_conf_t *priv = NULL;
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
inode_contribution_t *contribution = NULL;
@@ -2206,17 +2387,15 @@ reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
GF_VALIDATE_OR_GOTO ("marker", this, out);
GF_VALIDATE_OR_GOTO ("marker", loc, out);
- priv = this->private;
-
- ret = quota_inode_ctx_get (loc->inode, this, &ctx);
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
if (ret < 0)
goto out;
- contribution = get_contribution_node (loc->parent, ctx);
+ contribution = mq_get_contribution_node (loc->parent, ctx);
if (contribution == NULL)
goto out;
- local = quota_local_new ();
+ local = mq_local_new ();
if (local == NULL) {
ret = -1;
goto out;
@@ -2237,14 +2416,14 @@ reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
goto out;
}
- ret = loc_copy (&local->loc, loc);
+ ret = mq_loc_copy (&local->loc, loc);
if (ret < 0)
goto out;
local->ctx = ctx;
local->contri = contribution;
- ret = quota_inode_loc_fill (NULL, loc->parent, &local->parent_loc);
+ ret = mq_inode_loc_fill (NULL, loc->parent, &local->parent_loc);
if (ret < 0)
goto out;
@@ -2263,19 +2442,24 @@ reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
+ if (local->parent_loc.inode == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Inode is NULL, so can't stackwind.");
+ goto out;
+ }
+
STACK_WIND (frame,
mq_reduce_parent_size_xattr,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock);
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
local = NULL;
ret = 0;
out:
- if (local != NULL) {
- quota_local_unref (this, local);
- GF_FREE (local);
- }
+ if (local != NULL)
+ mq_local_unref (this, local);
return ret;
}
@@ -2289,7 +2473,7 @@ init_quota_priv (xlator_t *this)
int32_t
-quota_rename_update_newpath (xlator_t *this, loc_t *loc)
+mq_rename_update_newpath (xlator_t *this, loc_t *loc)
{
int32_t ret = -1;
quota_inode_ctx_t *ctx = NULL;
@@ -2299,23 +2483,23 @@ quota_rename_update_newpath (xlator_t *this, loc_t *loc)
GF_VALIDATE_OR_GOTO ("marker", loc, out);
GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
- ret = quota_inode_ctx_get (loc->inode, this, &ctx);
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
if (ret < 0)
goto out;
- contribution = add_new_contribution_node (this, ctx, loc);
+ contribution = mq_add_new_contribution_node (this, ctx, loc);
if (contribution == NULL) {
ret = -1;
goto out;
}
- initiate_quota_txn (this, loc);
+ mq_initiate_quota_txn (this, loc);
out:
return ret;
}
int32_t
-quota_forget (xlator_t *this, quota_inode_ctx_t *ctx)
+mq_forget (xlator_t *this, quota_inode_ctx_t *ctx)
{
inode_contribution_t *contri = NULL;
inode_contribution_t *next = NULL;
diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h
index 70524ab07..385760ac4 100644
--- a/xlators/features/marker/src/marker-quota.h
+++ b/xlators/features/marker/src/marker-quota.h
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_QUOTA_H
#define _MARKER_QUOTA_H
@@ -24,7 +15,6 @@
#include "config.h"
#endif
-#include "marker.h"
#include "xlator.h"
#include "marker-mem-types.h"
@@ -42,8 +32,7 @@
_local = _frame->local; \
_frame->local = NULL; \
STACK_DESTROY (_frame->root); \
- quota_local_unref (_this, _local); \
- GF_FREE (_local); \
+ mq_local_unref (_this, _local); \
} while (0)
@@ -105,59 +94,37 @@ struct inode_contribution {
};
typedef struct inode_contribution inode_contribution_t;
-struct quota_local {
- int64_t delta;
- int64_t d_off;
- int32_t err;
- int32_t ref;
- int64_t sum;
- int64_t size;
- int32_t hl_count;
- int32_t dentry_child_count;
-
- fd_t *fd;
- call_frame_t *frame;
- gf_lock_t lock;
-
- loc_t loc;
- loc_t parent_loc;
-
- quota_inode_ctx_t *ctx;
- inode_contribution_t *contri;
-};
-typedef struct quota_local quota_local_t;
-
int32_t
-get_lock_on_parent (call_frame_t *, xlator_t *);
+mq_get_lock_on_parent (call_frame_t *, xlator_t *);
int32_t
-quota_req_xattr (xlator_t *, loc_t *, dict_t *);
+mq_req_xattr (xlator_t *, loc_t *, dict_t *);
int32_t
init_quota_priv (xlator_t *);
int32_t
-quota_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt);
+mq_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt);
int32_t
-quota_set_inode_xattr (xlator_t *, loc_t *);
+mq_set_inode_xattr (xlator_t *, loc_t *);
int
-initiate_quota_txn (xlator_t *, loc_t *);
+mq_initiate_quota_txn (xlator_t *, loc_t *);
int32_t
-quota_dirty_inode_readdir (call_frame_t *, void *, xlator_t *,
- int32_t, int32_t, fd_t *);
+mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, fd_t *, dict_t *);
int32_t
-reduce_parent_size (xlator_t *, loc_t *, int64_t);
+mq_reduce_parent_size (xlator_t *, loc_t *, int64_t);
int32_t
-quota_rename_update_newpath (xlator_t *, loc_t *);
+mq_rename_update_newpath (xlator_t *, loc_t *);
int32_t
-inspect_file_xattr (xlator_t *this, loc_t *loc, dict_t *dict, struct iatt buf);
+mq_inspect_file_xattr (xlator_t *this, loc_t *loc, dict_t *dict, struct iatt buf);
int32_t
-quota_forget (xlator_t *, quota_inode_ctx_t *);
+mq_forget (xlator_t *, quota_inode_ctx_t *);
#endif
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
index 33e0a477d..6a2c85691 100644
--- a/xlators/features/marker/src/marker.c
+++ b/xlators/features/marker/src/marker.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -31,6 +22,8 @@
#include "marker-common.h"
#include "byte-order.h"
+#define _GF_UID_GID_CHANGED 1
+
void
fini (xlator_t *this);
@@ -63,23 +56,25 @@ marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
if (inode) {
loc->inode = inode_ref (inode);
- loc->ino = inode->ino;
+ if (uuid_is_null (loc->gfid)) {
+ uuid_copy (loc->gfid, loc->inode->gfid);
+ }
}
if (parent)
loc->parent = inode_ref (parent);
- loc->path = gf_strdup (path);
- if (!loc->path) {
- gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
- goto loc_wipe;
- }
+ if (path) {
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto loc_wipe;
+ }
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else
- goto loc_wipe;
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
ret = 0;
loc_wipe:
@@ -93,23 +88,14 @@ int
marker_inode_loc_fill (inode_t *inode, loc_t *loc)
{
char *resolvedpath = NULL;
- inode_t *parent = NULL;
int ret = -1;
+ inode_t *parent = NULL;
if ((!inode) || (!loc))
return ret;
- if ((inode) && (inode->ino == 1)) {
- loc->parent = NULL;
- goto ignore_parent;
- }
-
- parent = inode_parent (inode, 0, NULL);
- if (!parent) {
- goto err;
- }
+ parent = inode_parent (inode, NULL, NULL);
-ignore_parent:
ret = inode_path (inode, NULL, &resolvedpath);
if (ret < 0)
goto err;
@@ -119,11 +105,10 @@ ignore_parent:
goto err;
err:
- if (parent)
- inode_unref (parent);
+ if (parent)
+ inode_unref (parent);
- if (resolvedpath)
- GF_FREE (resolvedpath);
+ GF_FREE (resolvedpath);
return ret;
}
@@ -133,8 +118,17 @@ marker_trav_parent (marker_local_t *local)
{
int32_t ret = 0;
loc_t loc = {0, };
+ inode_t *parent = NULL;
+ int8_t need_unref = 0;
+
+ if (!local->loc.parent) {
+ parent = inode_parent (local->loc.inode, NULL, NULL);
+ if (parent)
+ need_unref = 1;
+ } else
+ parent = local->loc.parent;
- ret = marker_inode_loc_fill (local->loc.parent, &loc);
+ ret = marker_inode_loc_fill (parent, &loc);
if (ret < 0) {
ret = -1;
@@ -145,16 +139,28 @@ marker_trav_parent (marker_local_t *local)
local->loc = loc;
out:
+ if (need_unref)
+ inode_unref (parent);
+
return ret;
}
int32_t
-marker_error_handler (xlator_t *this)
+marker_error_handler (xlator_t *this, marker_local_t *local, int32_t op_errno)
{
- marker_conf_t *priv = NULL;
+ marker_conf_t *priv = NULL;
+ const char *path = NULL;
- priv = (marker_conf_t *) this->private;
+ priv = (marker_conf_t *) this->private;
+ path = local
+ ? (local->loc.path
+ ? local->loc.path : uuid_utoa(local->loc.gfid))
+ : "<nul>";
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Indexing gone corrupt at %s (reason: %s)."
+ " Geo-replication slave content needs to be revalidated",
+ path, strerror (op_errno));
unlink (priv->timestamp_file);
return 0;
@@ -179,12 +185,14 @@ marker_local_unref (marker_local_t *local)
loc_wipe (&local->loc);
loc_wipe (&local->parent_loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
if (local->oplocal) {
marker_local_unref (local->oplocal);
local->oplocal = NULL;
}
- GF_FREE (local);
+ mem_put (local);
out:
return 0;
}
@@ -219,13 +227,14 @@ stat_stampfile (xlator_t *this, marker_conf_t *priv,
int32_t
marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this,
- const char *name, struct volume_mark *vol_mark)
+ const char *name, struct volume_mark *vol_mark,
+ dict_t *xdata)
{
int32_t ret = -1;
dict_t *dict = NULL;
if (vol_mark == NULL){
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
goto out;
}
@@ -234,8 +243,11 @@ marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this,
ret = dict_set_bin (dict, (char *)name, vol_mark,
sizeof (struct volume_mark));
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "failed to set key %s",
+ name);
- STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict, xdata);
dict_unref (dict);
out:
@@ -251,7 +263,7 @@ call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name)
priv = (marker_conf_t *)this->private;
- if (frame->root->pid != -1 || name == NULL ||
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL ||
strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) {
ret = _gf_false;
goto out;
@@ -259,28 +271,31 @@ call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name)
stat_stampfile (this, priv, &vol_mark);
- marker_getxattr_stampfile_cbk (frame, this, name, vol_mark);
+ marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL);
out:
return ret;
}
int32_t
marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
if (cookie) {
gf_log (this->name, GF_LOG_DEBUG,
"Filtering the quota extended attributes");
- dict_foreach (dict, marker_filter_quota_xattr, NULL);
+ dict_foreach_fnmatch (dict, "trusted.glusterfs.quota*",
+ marker_filter_quota_xattr, NULL);
}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int32_t
marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
gf_boolean_t ret = _gf_false;
marker_conf_t *priv = NULL;
@@ -307,7 +322,7 @@ wind:
STACK_WIND_COOKIE (frame, marker_getxattr_cbk, (void *)cookie,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr, loc,
- name);
+ name, xdata);
}
return 0;
@@ -332,7 +347,7 @@ marker_setxattr_done (call_frame_t *frame)
int
marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
int32_t done = 0;
@@ -341,20 +356,26 @@ marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = (marker_local_t*) frame->local;
if (op_ret == -1 && op_errno == ENOSPC) {
- marker_error_handler (this);
+ marker_error_handler (this, local, op_errno);
done = 1;
goto out;
}
- if (strcmp (local->loc.path, "/") == 0) {
- done = 1;
- goto out;
+ if (local) {
+ if (local->loc.path && strcmp (local->loc.path, "/") == 0) {
+ done = 1;
+ goto out;
+ }
+ if (__is_root_gfid (local->loc.gfid)) {
+ done = 1;
+ goto out;
+ }
}
ret = marker_trav_parent (local);
if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "Error occured "
+ gf_log (this->name, GF_LOG_DEBUG, "Error occurred "
"while traversing to the parent, stopping marker");
done = 1;
@@ -375,7 +396,7 @@ out:
int32_t
marker_start_setxattr (call_frame_t *frame, xlator_t *this)
{
- int32_t ret = 0;
+ int32_t ret = -1;
dict_t *dict = NULL;
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -384,19 +405,37 @@ marker_start_setxattr (call_frame_t *frame, xlator_t *this)
local = (marker_local_t*) frame->local;
+ if (!local)
+ goto out;
+
dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (local->loc.inode && uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
ret = dict_set_static_bin (dict, priv->marker_xattr,
(void *)local->timebuf, 8);
-
- gf_log (this->name, GF_LOG_DEBUG, "path = %s", local->loc.path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set marker xattr (%s)", local->loc.path);
+ goto out;
+ }
STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0);
+ FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0,
+ NULL);
- dict_unref (dict);
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return ret;
}
void
@@ -429,12 +468,24 @@ marker_create_frame (xlator_t *this, marker_local_t *local)
int32_t
marker_xtime_update_marks (xlator_t *this, marker_local_t *local)
{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ priv = this->private;
+
+ if ((local->pid == GF_CLIENT_PID_GSYNCD
+ && !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE))
+ || (local->pid == GF_CLIENT_PID_DEFRAG))
+ goto out;
+
marker_gettimeofday (local);
marker_local_ref (local);
marker_create_frame (this, local);
-
+out:
return 0;
}
@@ -443,7 +494,7 @@ int32_t
marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -458,15 +509,18 @@ marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent);
+ buf, preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- quota_set_inode_xattr (this, &local->loc);
+ mq_set_inode_xattr (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -479,7 +533,7 @@ out:
int
marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -490,7 +544,7 @@ marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -500,12 +554,12 @@ marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto err;
wind:
STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, params);
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
return 0;
err:
STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
@@ -514,7 +568,7 @@ int32_t
marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -529,15 +583,18 @@ marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- inspect_file_xattr (this, &local->loc, NULL, *buf);
+ mq_set_inode_xattr (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -550,7 +607,7 @@ out:
int32_t
marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -561,7 +618,7 @@ marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -571,12 +628,12 @@ marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
wind:
STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd,
- params);
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
return 0;
err:
STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
}
@@ -585,7 +642,7 @@ err:
int32_t
marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -599,7 +656,8 @@ marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -607,7 +665,7 @@ marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- initiate_quota_txn (this, &local->loc);
+ mq_initiate_quota_txn (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -624,8 +682,8 @@ marker_writev (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t offset,
- struct iobref *iobref)
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -636,7 +694,7 @@ marker_writev (call_frame_t *frame,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -647,10 +705,10 @@ marker_writev (call_frame_t *frame,
wind:
STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- iobref);
+ flags, iobref, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -659,7 +717,7 @@ err:
int32_t
marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -674,7 +732,7 @@ marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -682,7 +740,7 @@ marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- reduce_parent_size (this, &local->loc, -1);
+ mq_reduce_parent_size (this, &local->loc, -1);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -693,7 +751,8 @@ out:
}
int32_t
-marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -704,7 +763,7 @@ marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -714,10 +773,10 @@ marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
goto err;
wind:
STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags);
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -726,7 +785,7 @@ err:
int32_t
marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -741,7 +800,7 @@ marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -749,7 +808,7 @@ marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if ((priv->feature_enabled & GF_QUOTA) && (local->ia_nlink == 1))
- reduce_parent_size (this, &local->loc, -1);
+ mq_reduce_parent_size (this, &local->loc, -1);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -762,33 +821,38 @@ out:
int32_t
marker_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
marker_local_t *local = NULL;
+ local = frame->local;
if (op_ret < 0) {
goto err;
}
- local = frame->local;
if (local == NULL) {
+ op_errno = EINVAL;
goto err;
}
local->ia_nlink = buf->ia_nlink;
STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc);
+ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+ local->xdata);
return 0;
err:
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL);
-
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ marker_local_unref (local);
return 0;
}
int32_t
-marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -799,8 +863,10 @@ marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (priv->feature_enabled == 0)
goto unlink_wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
-
+ local = mem_get0 (this->local_pool);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
MARKER_INIT_LOCAL (frame, local);
ret = loc_copy (&local->loc, loc);
@@ -808,17 +874,21 @@ marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (ret == -1)
goto err;
+ if (uuid_is_null (loc->gfid) && loc->inode)
+ uuid_copy (loc->gfid, loc->inode->gfid);
+
STACK_WIND (frame, marker_unlink_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
return 0;
unlink_wind:
STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL);
-
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ marker_local_unref (local);
return 0;
}
@@ -827,13 +897,13 @@ int32_t
marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
"linking a file ", strerror (op_errno));
}
@@ -842,7 +912,7 @@ marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -850,7 +920,7 @@ marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- initiate_quota_txn (this, &local->loc);
+ mq_initiate_quota_txn (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -861,7 +931,8 @@ out:
}
int32_t
-marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -872,7 +943,7 @@ marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -882,10 +953,11 @@ marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
goto err;
wind:
STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -893,7 +965,7 @@ err:
int32_t
marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
loc_t newloc = {0, };
@@ -912,9 +984,8 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
}
gf_log (this->name, GF_LOG_WARNING,
- "inodelk (UNLOCK) failed on path:%s, inode (ino:%"PRId64
- ", gfid:%s)(%s)", local->parent_loc.path,
- local->parent_loc.inode->ino,
+ "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)",
+ local->parent_loc.path,
uuid_utoa (local->parent_loc.inode->gfid),
strerror (op_errno));
}
@@ -924,13 +995,13 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
local->stub = NULL;
} else if (local->err != 0) {
STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
}
- reduce_parent_size (this, &oplocal->loc, oplocal->contribution);
+ mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution);
if (local->loc.inode != NULL) {
- reduce_parent_size (this, &local->loc, local->contribution);
+ mq_reduce_parent_size (this, &local->loc, local->contribution);
}
newloc.inode = inode_ref (oplocal->loc.inode);
@@ -939,14 +1010,14 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
if (newloc.name)
newloc.name++;
newloc.parent = inode_ref (local->loc.parent);
- newloc.ino = oplocal->loc.inode->ino;
- quota_rename_update_newpath (this, &newloc);
+ mq_rename_update_newpath (this, &newloc);
loc_wipe (&newloc);
if (priv->feature_enabled & GF_XTIME) {
//update marks on oldpath
+ uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
marker_xtime_update_marks (this, oplocal);
marker_xtime_update_marks (this, local);
}
@@ -960,7 +1031,7 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
struct gf_flock lock = {0, };
@@ -974,15 +1045,14 @@ marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
}
gf_log (this->name, GF_LOG_WARNING,
- "inodelk (UNLOCK) failed on path:%s, inode (ino:%"PRId64
- ", gfid:%s)(%s)", oplocal->parent_loc.path,
- oplocal->parent_loc.inode->ino,
+ "inodelk (UNLOCK) failed on %s (gfid:%s) (%s)",
+ oplocal->parent_loc.path,
uuid_utoa (oplocal->parent_loc.inode->gfid),
strerror (op_errno));
}
if (local->next_lock_on == NULL) {
- marker_rename_done (frame, NULL, this, 0, 0);
+ marker_rename_done (frame, NULL, this, 0, 0, NULL);
goto out;
}
@@ -996,7 +1066,7 @@ marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
marker_rename_done,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock);
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
out:
return 0;
@@ -1006,7 +1076,7 @@ out:
int32_t
marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
struct gf_flock lock = {0, };
@@ -1018,8 +1088,9 @@ marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie,
local->err = op_errno;
}
- //Reset frame uid and gid if reset.
- MARKER_SET_UID_GID (frame->root, local);
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
@@ -1031,7 +1102,7 @@ marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie,
marker_rename_release_newp_lock,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &oplocal->parent_loc, F_SETLKW, &lock);
+ this->name, &oplocal->parent_loc, F_SETLKW, &lock, NULL);
return 0;
}
@@ -1040,7 +1111,8 @@ int32_t
marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -1048,6 +1120,7 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_stub_t *stub = NULL;
int32_t ret = 0;
char contri_key [512] = {0, };
+ loc_t newloc = {0, };
local = (marker_local_t *) frame->local;
@@ -1062,7 +1135,7 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->err = op_errno;
}
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
"renaming a file ", strerror (op_errno));
}
@@ -1074,7 +1147,7 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret,
op_errno, buf, preoldparent,
postoldparent, prenewparent,
- postnewparent);
+ postnewparent, xdata);
if (stub == NULL) {
local->err = ENOMEM;
goto quota_err;
@@ -1091,20 +1164,28 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Removexattr requires uid and gid to be 0,
* reset them in the callback.
*/
- MARKER_SET_UID_GID (local, frame->root);
- frame->root->uid = 0;
- frame->root->gid = 0;
-
- STACK_WIND (frame, marker_rename_release_oldp_lock,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, &local->loc,
- contri_key);
+ MARKER_SET_UID_GID (frame, local, frame->root);
+
+ newloc.inode = inode_ref (oplocal->loc.inode);
+ newloc.path = gf_strdup (local->loc.path);
+ newloc.name = strrchr (newloc.path, '/');
+ if (newloc.name)
+ newloc.name++;
+ newloc.parent = inode_ref (local->loc.parent);
+ uuid_copy (newloc.gfid, oplocal->loc.inode->gfid);
+
+ STACK_WIND_COOKIE (frame, marker_rename_release_oldp_lock,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ &newloc, contri_key, NULL);
+
+ loc_wipe (&newloc);
} else {
frame->local = NULL;
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent, prenewparent,
- postnewparent);
+ postnewparent, xdata);
if ((op_ret < 0) || (local == NULL)) {
goto out;
@@ -1112,6 +1193,7 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (priv->feature_enabled & GF_XTIME) {
//update marks on oldpath
+ uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
marker_xtime_update_marks (this, oplocal);
marker_xtime_update_marks (this, local);
}
@@ -1126,15 +1208,14 @@ out:
return 0;
quota_err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
}
int32_t
marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
@@ -1144,12 +1225,15 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
oplocal = local->oplocal;
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
+
if ((op_ret < 0) && (op_errno != ENOATTR)) {
local->err = op_errno;
gf_log (this->name, GF_LOG_WARNING,
- "fetching contribution values from %s (ino:%"PRId64", "
- "gfid:%s) failed (%s)", local->loc.path,
- local->loc.inode->ino,
+ "fetching contribution values from %s (gfid:%s) "
+ "failed (%s)", local->loc.path,
uuid_utoa (local->loc.inode->gfid),
strerror (op_errno));
goto err;
@@ -1170,12 +1254,12 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename, &oplocal->loc,
- &local->loc);
+ &local->loc, NULL);
return 0;
err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -1183,7 +1267,7 @@ err:
int32_t
marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict)
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
@@ -1193,12 +1277,15 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
local = frame->local;
oplocal = local->oplocal;
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
+
if ((op_ret < 0) && (op_errno != ENOATTR)) {
local->err = op_errno;
gf_log (this->name, GF_LOG_WARNING,
- "fetching contribution values from %s (ino:%"PRId64", "
- "gfid:%s) failed (%s)", oplocal->loc.path,
- oplocal->loc.inode->ino,
+ "fetching contribution values from %s (gfid:%s) "
+ "failed (%s)", oplocal->loc.path,
uuid_utoa (oplocal->loc.inode->gfid),
strerror (op_errno));
goto err;
@@ -1220,17 +1307,26 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
goto err;
}
- STACK_WIND (frame, marker_do_rename,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, &local->loc,
- contri_key);
+ /* getxattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID (frame, local, frame->root);
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND_COOKIE (frame, marker_do_rename,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &local->loc, contri_key, NULL);
} else {
- marker_do_rename (frame, NULL, this, 0, 0, NULL);
+ marker_do_rename (frame, NULL, this, 0, 0, NULL, NULL);
}
return 0;
err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -1238,7 +1334,7 @@ err:
int32_t
marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
@@ -1250,10 +1346,8 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
if (op_ret < 0) {
local->err = op_errno;
gf_log (this->name, GF_LOG_WARNING,
- "cannot hold inodelk on %s (ino:%"PRId64", gfid:%s)"
- "(%s)",
+ "cannot hold inodelk on %s (gfid:%s) (%s)",
local->next_lock_on->path,
- local->next_lock_on->inode->ino,
uuid_utoa (local->next_lock_on->inode->gfid),
strerror (op_errno));
goto lock_err;
@@ -1265,22 +1359,34 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
goto quota_err;
}
- STACK_WIND (frame, marker_get_newpath_contribution, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, &oplocal->loc,
- contri_key);
+ /* getxattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID (frame, local, frame->root);
+
+ if (uuid_is_null (oplocal->loc.gfid))
+ uuid_copy (oplocal->loc.gfid,
+ oplocal->loc.inode->gfid);
+
+ GF_UUID_ASSERT (oplocal->loc.gfid);
+
+ STACK_WIND_COOKIE (frame, marker_get_newpath_contribution,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &oplocal->loc, contri_key, NULL);
return 0;
quota_err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
lock_err:
if ((local->next_lock_on == NULL)
|| (local->next_lock_on == &local->parent_loc)) {
local->next_lock_on = NULL;
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
} else {
- marker_rename_release_newp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_newp_lock (frame, NULL, this, 0, 0, NULL);
}
return 0;
@@ -1289,7 +1395,7 @@ lock_err:
int32_t
marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
loc_t *loc = NULL;
@@ -1307,9 +1413,8 @@ marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->err = op_errno;
gf_log (this->name, GF_LOG_WARNING,
- "cannot hold inodelk on %s (ino:%"PRId64", gfid:%s)"
- "(%s)", loc->path, loc->inode->ino,
- uuid_utoa (loc->inode->gfid),
+ "cannot hold inodelk on %s (gfid:%s) (%s)",
+ loc->path, uuid_utoa (loc->inode->gfid),
strerror (op_errno));
goto err;
}
@@ -1325,22 +1430,22 @@ marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, local->next_lock_on,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
} else {
- marker_get_oldpath_contribution (frame, 0, this, 0, 0);
+ marker_get_oldpath_contribution (frame, 0, this, 0, 0, NULL);
}
return 0;
err:
- marker_rename_done (frame, NULL, this, 0, 0);
+ marker_rename_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
int32_t
marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1354,11 +1459,11 @@ marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
if (priv->feature_enabled == 0)
goto rename_wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
- ALLOCATE_OR_GOTO (oplocal, marker_local_t, err);
+ oplocal = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, oplocal);
@@ -1378,11 +1483,11 @@ marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
goto rename_wind;
}
- ret = quota_inode_loc_fill (NULL, newloc->parent, &local->parent_loc);
+ ret = mq_inode_loc_fill (NULL, newloc->parent, &local->parent_loc);
if (ret < 0)
goto err;
- ret = quota_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc);
+ ret = mq_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc);
if (ret < 0)
goto err;
@@ -1409,18 +1514,18 @@ marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, lock_on,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
rename_wind:
STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
err:
STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1429,13 +1534,13 @@ err:
int32_t
marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
"truncating a file ", strerror (op_errno));
}
@@ -1444,7 +1549,7 @@ marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1452,7 +1557,7 @@ marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- initiate_quota_txn (this, &local->loc);
+ mq_initiate_quota_txn (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -1464,7 +1569,8 @@ out:
}
int32_t
-marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1475,7 +1581,7 @@ marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1485,10 +1591,10 @@ marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
goto err;
wind:
STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1497,13 +1603,13 @@ err:
int32_t
marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
"truncating a file ", strerror (op_errno));
}
@@ -1512,7 +1618,7 @@ marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1520,7 +1626,7 @@ marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- initiate_quota_txn (this, &local->loc);
+ mq_initiate_quota_txn (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -1531,7 +1637,8 @@ out:
}
int32_t
-marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1542,7 +1649,7 @@ marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1552,10 +1659,10 @@ marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
goto err;
wind:
STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1565,13 +1672,13 @@ int32_t
marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
"creating symlinks ", strerror (op_errno));
}
@@ -1580,15 +1687,18 @@ marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
- inspect_file_xattr (this, &local->loc, NULL, *buf);
+ mq_set_inode_xattr (this, &local->loc);
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -1600,7 +1710,7 @@ out:
int
marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1611,7 +1721,7 @@ marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1621,11 +1731,12 @@ marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
goto err;
wind:
STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, params);
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
return 0;
err:
STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
@@ -1634,13 +1745,13 @@ int32_t
marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
"creating symlinks ", strerror (op_errno));
}
@@ -1649,15 +1760,18 @@ marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
- buf, preparent, postparent);
+ buf, preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) {
- inspect_file_xattr (this, &local->loc, NULL, *buf);
+ mq_set_inode_xattr (this, &local->loc);
}
if (priv->feature_enabled & GF_XTIME)
@@ -1670,7 +1784,7 @@ out:
int
marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *parms)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1681,7 +1795,7 @@ marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1693,11 +1807,216 @@ marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto err;
wind:
STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, parms);
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
return 0;
err:
STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "fallocating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during discard",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+
return 0;
}
@@ -1728,7 +2047,7 @@ call_from_sp_client_to_reset_tmfile (call_frame_t *frame,
if (data == NULL)
return -1;
- if (frame->root->pid != -1) {
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
op_ret = -1;
op_errno = EPERM;
@@ -1757,7 +2076,7 @@ call_from_sp_client_to_reset_tmfile (call_frame_t *frame,
op_errno = EINVAL;
}
out:
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -1765,21 +2084,21 @@ out:
int32_t
marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
- "creating symlinks ", strerror (op_errno));
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred in "
+ "setxattr ", strerror (op_errno));
}
local = (marker_local_t *) frame->local;
frame->local = NULL;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1796,7 +2115,7 @@ out:
int32_t
marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1811,7 +2130,7 @@ marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
if (ret == 0)
return 0;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1821,10 +2140,10 @@ marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
goto err;
wind:
STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags);
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1832,13 +2151,13 @@ err:
int32_t
marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occured while "
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
"creating symlinks ", strerror (op_errno));
}
@@ -1846,7 +2165,7 @@ marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1863,7 +2182,7 @@ out:
int32_t
marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1878,7 +2197,7 @@ marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
if (ret == 0)
return 0;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1888,10 +2207,10 @@ marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
goto err;
wind:
STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags);
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1900,13 +2219,13 @@ err:
int32_t
marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s occured while "
+ gf_log (this->name, GF_LOG_ERROR, "%s occurred while "
"creating symlinks ", strerror (op_errno));
}
@@ -1915,7 +2234,7 @@ marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1933,7 +2252,7 @@ out:
int32_t
marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1944,7 +2263,7 @@ marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1954,10 +2273,10 @@ marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto err;
wind:
STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid);
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1966,7 +2285,7 @@ err:
int32_t
marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -1978,13 +2297,13 @@ marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
GF_LOG_ERROR),
- "%s occured during setattr of %s",
+ "%s occurred during setattr of %s",
strerror (op_errno),
(local ? local->loc.path : "<nul>"));
}
STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -2001,7 +2320,7 @@ out:
int32_t
marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -2012,7 +2331,7 @@ marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -2022,10 +2341,10 @@ marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto err;
wind:
STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid);
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2033,13 +2352,13 @@ err:
int32_t
marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s occured while "
+ gf_log (this->name, GF_LOG_ERROR, "%s occurred while "
"creating symlinks ", strerror (op_errno));
}
@@ -2047,7 +2366,7 @@ marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -2064,7 +2383,7 @@ out:
int32_t
marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -2075,7 +2394,7 @@ marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -2085,10 +2404,10 @@ marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto err;
wind:
STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2117,10 +2436,19 @@ marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1 || local == NULL)
goto out;
+ /* copy the gfid from the stat structure instead of inode,
+ * since if the lookup is fresh lookup, then the inode
+ * would have not yet linked to the inode table which happens
+ * in protocol/server.
+ */
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA) {
- quota_xattr_state (this, &local->loc, dict, *buf);
+ mq_xattr_state (this, &local->loc, dict, *buf);
}
out:
@@ -2142,7 +2470,7 @@ marker_lookup (call_frame_t *frame, xlator_t *this,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -2151,7 +2479,7 @@ marker_lookup (call_frame_t *frame, xlator_t *this,
goto err;
if ((priv->feature_enabled & GF_QUOTA) && xattr_req)
- quota_req_xattr (this, loc, xattr_req);
+ mq_req_xattr (this, loc, xattr_req);
wind:
STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -2162,6 +2490,49 @@ err:
return 0;
}
+int
+marker_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ /* TODO: fill things */
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+int
+marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ if ((priv->feature_enabled & GF_QUOTA) && dict)
+ mq_req_xattr (this, NULL, dict);
+
+wind:
+ STACK_WIND (frame, marker_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+}
+
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -2262,14 +2633,11 @@ marker_xtime_priv_cleanup (xlator_t *this)
GF_VALIDATE_OR_GOTO (this->name, priv, out);
- if (priv->volume_uuid != NULL)
- GF_FREE (priv->volume_uuid);
+ GF_FREE (priv->volume_uuid);
- if (priv->timestamp_file != NULL)
- GF_FREE (priv->timestamp_file);
+ GF_FREE (priv->timestamp_file);
- if (priv->marker_xattr != NULL)
- GF_FREE (priv->marker_xattr);
+ GF_FREE (priv->marker_xattr);
out:
return;
}
@@ -2297,7 +2665,7 @@ out:
int32_t
reconfigure (xlator_t *this, dict_t *options)
{
- int32_t ret = -1;
+ int32_t ret = 0;
data_t *data = NULL;
gf_boolean_t flag = _gf_false;
marker_conf_t *priv = NULL;
@@ -2338,11 +2706,17 @@ reconfigure (xlator_t *this, dict_t *options)
"xtime updation will fail");
} else {
priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto out;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
}
}
}
out:
- return 0;
+ return ret;
}
@@ -2398,9 +2772,23 @@ init (xlator_t *this)
goto err;
priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto cont;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
}
}
+ cont:
+ this->local_pool = mem_pool_new (marker_local_t, 128);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
return 0;
err:
marker_priv_cleanup (this);
@@ -2422,7 +2810,7 @@ marker_forget (xlator_t *this, inode_t *inode)
goto out;
}
- quota_forget (this, ctx->quota_ctx);
+ mq_forget (this, ctx->quota_ctx);
GF_FREE (ctx);
out:
@@ -2453,7 +2841,11 @@ struct xlator_fops fops = {
.setattr = marker_setattr,
.fsetattr = marker_fsetattr,
.removexattr = marker_removexattr,
- .getxattr = marker_getxattr
+ .getxattr = marker_getxattr,
+ .readdirp = marker_readdirp,
+ .fallocate = marker_fallocate,
+ .discard = marker_discard,
+ .zerofill = marker_zerofill,
};
struct xlator_cbks cbks = {
@@ -2465,5 +2857,6 @@ struct volume_options options[] = {
{.key = {"timestamp-file"}},
{.key = {"quota"}},
{.key = {"xtime"}},
+ {.key = {"gsync-force-xtime"}},
{.key = {NULL}}
};
diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h
index 5502c4ea1..1a58f8cfc 100644
--- a/xlators/features/marker/src/marker.h
+++ b/xlators/features/marker/src/marker.h
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _MARKER_H
#define _MARKER_H
@@ -37,8 +28,9 @@
#define TIMESTAMP_FILE "timestamp-file"
enum {
- GF_QUOTA=1,
- GF_XTIME=2
+ GF_QUOTA = 1,
+ GF_XTIME = 2,
+ GF_XTIME_GSYNC_FORCE = 4,
};
/*initialize the local variable*/
@@ -64,7 +56,7 @@ enum {
} \
} while (0)
-#define MARKER_SET_UID_GID(dest, src) \
+#define _MARKER_SET_UID_GID(dest, src) \
do { \
if (src->uid != -1 && \
src->gid != -1) { \
@@ -73,6 +65,20 @@ enum {
} \
} while (0)
+#define MARKER_SET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID (dest, src); \
+ frame->root->uid = 0; \
+ frame->root->gid = 0; \
+ frame->cookie = (void *) _GF_UID_GID_CHANGED; \
+ } while (0)
+
+#define MARKER_RESET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID (dest, src); \
+ frame->cookie = NULL; \
+ } while (0)
+
struct marker_local{
uint32_t timebuf[2];
pid_t pid;
@@ -89,9 +95,28 @@ struct marker_local{
call_stub_t *stub;
int64_t contribution;
struct marker_local *oplocal;
+
+ /* marker quota specific */
+ int64_t delta;
+ int64_t d_off;
+ int64_t sum;
+ int64_t size;
+ int32_t hl_count;
+ int32_t dentry_child_count;
+
+ fd_t *fd;
+ call_frame_t *frame;
+
+ quota_inode_ctx_t *ctx;
+ inode_contribution_t *contri;
+
+ int xflag;
+ dict_t *xdata;
};
typedef struct marker_local marker_local_t;
+#define quota_local_t marker_local_t
+
struct marker_inode_ctx {
struct quota_inode_ctx *quota_ctx;
};
diff --git a/xlators/features/marker/utils/Makefile.am b/xlators/features/marker/utils/Makefile.am
deleted file mode 100644
index 84e926c00..000000000
--- a/xlators/features/marker/utils/Makefile.am
+++ /dev/null
@@ -1,7 +0,0 @@
-SUBDIRS = syncdaemon
-
-gsyncddir = $(libexecdir)/glusterfs
-
-gsyncd_SCRIPTS = gsyncd
-
-CLEANFILES =
diff --git a/xlators/features/marker/utils/gsyncd.in b/xlators/features/marker/utils/gsyncd.in
deleted file mode 100755
index a7af8c0b0..000000000
--- a/xlators/features/marker/utils/gsyncd.in
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/sh
-
-prefix="@prefix@"
-exec_prefix="@exec_prefix@"
-libexecdir=`eval echo "@libexecdir@"`
-sbindir=`eval echo "@sbindir@"`
-
-gluster="$sbindir"/gluster
-
-# glusterd service autodetection
-
-config_wanted=1
-if [ "$_GLUSTERD_CALLED_" = 1 ]; then
- # OK, we know glusterd called us, no need to look for further config
- config_wanted=0
- # ... altough this conclusion should not inherit to our children
- unset _GLUSTERD_CALLED_
-else
- # look for a -c option -- if present, we are already configured.
-
- for a in "$@"; do
- # -c found, see if it has an argument
- if [ "$one_more_arg" = 1 ]; then
- if echo "$a" | grep -qv ^-; then
- config_wanted=0
- break
- fi
- one_more_arg=0
- fi
-
- if [ "$a" = -c ] || [ "$a" = --config-file ]; then
- one_more_arg=1
- continue
- fi
-
- if echo $a | grep -qE '^(-c.|--config-file=)'; then
- config_wanted=0;
- break
- fi
- done
-
-fi
-
-if [ $config_wanted = 1 ]; then
- wd="`${gluster} system:: getwd`"
- if [ $? -eq 0 ]; then
- config_file="$wd/geo-replication/gsyncd.conf"
- fi
-fi
-
-if [ -z "$config_file" ]; then
- exec @PYTHON@ "$libexecdir"/glusterfs/python/syncdaemon/gsyncd.py "$@"
-else
- exec @PYTHON@ "$libexecdir"/glusterfs/python/syncdaemon/gsyncd.py -c "$config_file" "$@"
-fi
diff --git a/xlators/features/marker/utils/syncdaemon/Makefile.am b/xlators/features/marker/utils/syncdaemon/Makefile.am
deleted file mode 100644
index ef2dc9aea..000000000
--- a/xlators/features/marker/utils/syncdaemon/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
-
-syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py resource.py configinterface.py syncdutils.py monitor.py libcxattr.py
-
-CLEANFILES =
diff --git a/xlators/features/marker/utils/syncdaemon/README.md b/xlators/features/marker/utils/syncdaemon/README.md
deleted file mode 100644
index d45006932..000000000
--- a/xlators/features/marker/utils/syncdaemon/README.md
+++ /dev/null
@@ -1,81 +0,0 @@
-gsycnd, the Gluster Syncdaemon
-==============================
-
-REQUIREMENTS
-------------
-
-_gsyncd_ is a program which can operate either in _master_ or in _slave_ mode.
-Requirements are categorized according to this.
-
-* supported OS is GNU/Linux
-* Python >= 2.5, or 2.4 with Ctypes (see below) (both)
-* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave)
-* rsync (both)
-* glusterfs with marker support (master); glusterfs (optional on slave)
-* FUSE; for supported versions consult glusterfs
-
-INSTALLATION
-------------
-
-As of now, the supported way of operation is running from the source directory.
-
-If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
-
-CONFIGURATION
--------------
-
-gsyncd tunables are a subset of the long command-line options; for listing them,
-type
-
- gsyncd.py --help
-
-and see the long options up to "--config-file". (The leading double dash should be omitted;
-interim underscores and dashes are interchangeable.) The set of options bear some resemblance
-to those of glusterfs and rsync.
-
-The config file format matches the following syntax:
-
- <option1>: <value1>
- <option2>: <value2>
- # comment
-
-By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd.conf_
-in the source tree.
-
-USAGE
------
-
-gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally.
-Assume we have a gluster volume _pop_ at localhost. We try to set up the following mirrors
-for it with gysncd:
-
-1. _/data/mirror_
-2. local gluster volume _yow_
-3. _/data/far_mirror_ at example.com
-4. gluster volume _moz_ at example.com
-
-The respective gsyncd invocations are (demoing some syntax sugaring):
-
-1.
-
- gsyncd.py gluster://localhost:pop file:///data/mirror
-
- or short form
-
- gsyncd.py :pop /data/mirror
-
-2. `gsyncd :pop :yow`
-3.
-
- gsyncd.py :pop ssh://example.com:/data/far_mirror
-
- or short form
-
- gsyncd.py :pop example.com:/data/far_mirror
-
-4. `gsyncd.py :pop example.com::moz`
-
-gsyncd has to be available on both sides; it's location on the remote side has to be specified
-via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be
-used for setting options on the remote side, although the suggested mode of operation is to
-set parameters like log file / pid file in the configuration file.)
diff --git a/xlators/features/marker/utils/syncdaemon/__init__.py b/xlators/features/marker/utils/syncdaemon/__init__.py
deleted file mode 100644
index e69de29bb..000000000
--- a/xlators/features/marker/utils/syncdaemon/__init__.py
+++ /dev/null
diff --git a/xlators/features/marker/utils/syncdaemon/configinterface.py b/xlators/features/marker/utils/syncdaemon/configinterface.py
deleted file mode 100644
index cc8f7063a..000000000
--- a/xlators/features/marker/utils/syncdaemon/configinterface.py
+++ /dev/null
@@ -1,185 +0,0 @@
-try:
- import ConfigParser
-except ImportError:
- # py 3
- import configparser as ConfigParser
-import re
-from string import Template
-
-from syncdutils import escape, unescape, norm, update_file, GsyncdError
-
-SECT_ORD = '__section_order__'
-SECT_META = '__meta__'
-config_version = 2.0
-
-re_type = type(re.compile(''))
-
-
-class MultiDict(object):
-
- def __init__(self, *dd):
- self.dicts = dd
-
- def __getitem__(self, key):
- val = None
- for d in self.dicts:
- if d.get(key):
- val = d[key]
- if not val:
- raise KeyError(key)
- return val
-
-
-class GConffile(object):
-
- def _normconfig(self):
- for n, s in self.config._sections.items():
- if n.find('__') == 0:
- continue
- s2 = type(s)()
- for k, v in s.items():
- if k.find('__') != 0:
- k = norm(k)
- s2[k] = v
- self.config._sections[n] = s2
-
- def __init__(self, path, peers, *dd):
- self.peers = peers
- self.path = path
- self.auxdicts = dd
- self.config = ConfigParser.RawConfigParser()
- self.config.read(path)
- self._normconfig()
-
- def section(self, rx=False):
- peers = self.peers
- if not peers:
- peers = ['.', '.']
- rx = True
- if rx:
- st = 'peersrx'
- else:
- st = 'peers'
- return ' '.join([st] + [escape(u) for u in peers])
-
- @staticmethod
- def parse_section(section):
- sl = section.split()
- st = sl.pop(0)
- sl = [unescape(u) for u in sl]
- if st == 'peersrx':
- sl = [re.compile(u) for u in sl]
- return sl
-
- def ord_sections(self):
- """Return an ordered list of sections.
-
- Ordering happens based on the auxiliary
- SECT_ORD section storing indices for each
- section added through the config API.
-
- To not to go corrupt in case of manually
- written config files, we take care to append
- also those sections which are not registered
- in SECT_ORD.
-
- Needed for python 2.{4,5} where ConfigParser
- cannot yet order sections/options internally.
- """
- so = {}
- if self.config.has_section(SECT_ORD):
- so = self.config._sections[SECT_ORD]
- so2 = {}
- for k, v in so.items():
- if k != '__name__':
- so2[k] = int(v)
- tv = 0
- if so2:
- tv = max(so2.values()) + 1
- ss = [s for s in self.config.sections() if s.find('__') != 0]
- for s in ss:
- if s in so.keys():
- continue
- so2[s] = tv
- tv += 1
- def scmp(x, y):
- return cmp(*(so2[s] for s in (x, y)))
- ss.sort(scmp)
- return ss
-
- def update_to(self, dct, allow_unresolved=False):
- if not self.peers:
- raise GsyncdError('no peers given, cannot select matching options')
- def update_from_sect(sect, mud):
- for k, v in self.config._sections[sect].items():
- if k == '__name__':
- continue
- if allow_unresolved:
- dct[k] = Template(v).safe_substitute(mud)
- else:
- dct[k] = Template(v).substitute(mud)
- for sect in self.ord_sections():
- sp = self.parse_section(sect)
- if isinstance(sp[0], re_type) and len(sp) == len(self.peers):
- match = True
- mad = {}
- for i in range(len(sp)):
- m = sp[i].search(self.peers[i])
- if not m:
- match = False
- break
- for j in range(len(m.groups())):
- mad['match%d_%d' % (i+1, j+1)] = m.groups()[j]
- if match:
- update_from_sect(sect, MultiDict(dct, mad, *self.auxdicts))
- if self.config.has_section(self.section()):
- update_from_sect(self.section(), MultiDict(dct, *self.auxdicts))
-
- def get(self, opt=None):
- d = {}
- self.update_to(d, allow_unresolved = True)
- if opt:
- opt = norm(opt)
- v = d.get(opt)
- if v:
- print v
- else:
- for k, v in d.iteritems():
- if k == '__name__':
- continue
- print("%s: %s" % (k, v))
-
- def write(self, trfn, opt, *a, **kw):
- def mergeconf(f):
- self.config = ConfigParser.RawConfigParser()
- self.config.readfp(f)
- self._normconfig()
- if not self.config.has_section(SECT_META):
- self.config.add_section(SECT_META)
- self.config.set(SECT_META, 'version', config_version)
- return trfn(norm(opt), *a, **kw)
- def updateconf(f):
- self.config.write(f)
- update_file(self.path, updateconf, mergeconf)
-
- def _set(self, opt, val, rx=False):
- sect = self.section(rx)
- if not self.config.has_section(sect):
- self.config.add_section(sect)
- # regarding SECT_ORD, cf. ord_sections
- if not self.config.has_section(SECT_ORD):
- self.config.add_section(SECT_ORD)
- self.config.set(SECT_ORD, sect, len(self.config._sections[SECT_ORD]))
- self.config.set(sect, opt, val)
- return True
-
- def set(self, opt, *a, **kw):
- self.write(self._set, opt, *a, **kw)
-
- def _delete(self, opt, rx=False):
- sect = self.section(rx)
- if self.config.has_section(sect):
- return self.config.remove_option(sect, opt)
-
- def delete(self, opt, *a, **kw):
- self.write(self._delete, opt, *a, **kw)
diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py
deleted file mode 100644
index ddbac21e4..000000000
--- a/xlators/features/marker/utils/syncdaemon/gconf.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import os
-
-class GConf(object):
- ssh_ctl_dir = None
- ssh_ctl_args = None
- cpid = None
- pid_file_owned = False
- log_exit = False
- permanent_handles = []
-
- @classmethod
- def setup_ssh_ctl(cls, ctld):
- cls.ssh_ctl_dir = ctld
- cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")]
-
-gconf = GConf()
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py
deleted file mode 100644
index fb1dc1b9c..000000000
--- a/xlators/features/marker/utils/syncdaemon/gsyncd.py
+++ /dev/null
@@ -1,305 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import os.path
-import sys
-import time
-import logging
-import signal
-import select
-import optparse
-import fcntl
-from optparse import OptionParser, SUPPRESS_HELP
-from logging import Logger
-from errno import EEXIST, ENOENT
-
-from gconf import gconf
-from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception
-from syncdutils import GsyncdError
-from configinterface import GConffile
-import resource
-from monitor import monitor
-
-class GLogger(Logger):
-
- def makeRecord(self, name, level, *a):
- rv = Logger.makeRecord(self, name, level, *a)
- rv.nsecs = (rv.created - int(rv.created)) * 1000000
- fr = sys._getframe(4)
- callee = fr.f_locals.get('self')
- if callee:
- ctx = str(type(callee)).split("'")[1].split('.')[-1]
- else:
- ctx = '<top>'
- if not hasattr(rv, 'funcName'):
- rv.funcName = fr.f_code.co_name
- rv.lvlnam = logging.getLevelName(level)[0]
- rv.ctx = ctx
- return rv
-
- @classmethod
- def setup(cls, **kw):
- lbl = kw.get('label', "")
- if lbl:
- lbl = '(' + lbl + ')'
- lprm = {'datefmt': "%Y-%m-%d %H:%M:%S",
- 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" + lbl + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"}
- lprm.update(kw)
- lvl = kw.get('level', logging.INFO)
- lprm['level'] = lvl
- logging.root = cls("root", lvl)
- logging.setLoggerClass(cls)
- logging.getLogger().handlers = []
- logging.basicConfig(**lprm)
-
-
-def startup(**kw):
- if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn':
- if not grabpidfile():
- sys.stderr.write("pidfile is taken, exiting.\n")
- sys.exit(2)
- gconf.pid_file_owned = True
-
- if kw.get('go_daemon') == 'should':
- x, y = os.pipe()
- gconf.cpid = os.fork()
- if gconf.cpid:
- os.close(x)
- sys.exit()
- os.close(y)
- os.setsid()
- dn = os.open(os.devnull, os.O_RDWR)
- for f in (sys.stdin, sys.stdout, sys.stderr):
- os.dup2(dn, f.fileno())
- if getattr(gconf, 'pid_file', None):
- if not grabpidfile(gconf.pid_file + '.tmp'):
- raise GsyncdError("cannot grab temporary pidfile")
- os.rename(gconf.pid_file + '.tmp', gconf.pid_file)
- # wait for parent to terminate
- # so we can start up with
- # no messing from the dirty
- # ol' bustard
- select.select((x,), (), ())
- os.close(x)
-
- lkw = {}
- if gconf.log_level:
- lkw['level'] = gconf.log_level
- if kw.get('log_file'):
- if kw['log_file'] in ('-', '/dev/stderr'):
- lkw['stream'] = sys.stderr
- elif kw['log_file'] == '/dev/stdout':
- lkw['stream'] = sys.stdout
- else:
- lkw['filename'] = kw['log_file']
- GLogger.setup(label=kw.get('label'), **lkw)
- gconf.log_exit = True
-
-def main():
- signal.signal(signal.SIGTERM, lambda *a: finalize(*a, **{'exval': 1}))
- GLogger.setup()
- excont = FreeObject(exval = 0)
- try:
- try:
- main_i()
- except:
- log_raise_exception(excont)
- finally:
- finalize(exval = excont.exval)
-
-def main_i():
- rconf = {'go_daemon': 'should'}
-
- def store_abs(opt, optstr, val, parser):
- if val and val != '-':
- val = os.path.abspath(val)
- setattr(parser.values, opt.dest, val)
- def store_local(opt, optstr, val, parser):
- rconf[opt.dest] = val
- def store_local_curry(val):
- return lambda o, oo, vx, p: store_local(o, oo, val, p)
- def store_local_obj(op, dmake):
- return lambda o, oo, vx, p: store_local(o, oo, FreeObject(op=op, **dmake(vx)), p)
-
- op = OptionParser(usage="%prog [options...] <master> <slave>", version="%prog 0.0.1")
- op.add_option('--gluster-command', metavar='CMD', default='glusterfs')
- op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs)
- op.add_option('--gluster-log-level', metavar='LVL')
- op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs)
- op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs)
- op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs)
- op.add_option('-L', '--log-level', metavar='LVL')
- op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0]))
- op.add_option('--volume-id', metavar='UUID')
- op.add_option('--session-owner', metavar='ID')
- op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh')
- op.add_option('--rsync-command', metavar='CMD', default='rsync')
- op.add_option('--rsync-extra', metavar='ARGS', default='-sS', help=SUPPRESS_HELP)
- op.add_option('--timeout', metavar='SEC', type=int, default=120)
- op.add_option('--sync-jobs', metavar='N', type=int, default=3)
- op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP)
-
- op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local)
- # duh. need to specify dest or value will be mapped to None :S
- op.add_option('--monitor', dest='monitor', action='callback', callback=store_local_curry(True))
- op.add_option('--feedback-fd', dest='feedback_fd', type=int, help=SUPPRESS_HELP, action='callback', callback=store_local)
- op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True))
- op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont'))
- op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a),
- setattr(a[-1].values, 'log_file', '-'),
- setattr(a[-1].values, 'log_level', 'DEBUG'))),
-
- for a in ('check', 'get'):
- op.add_option('--config-' + a, metavar='OPT', type=str, dest='config', action='callback',
- callback=store_local_obj(a, lambda vx: {'opt': vx}))
- op.add_option('--config-get-all', dest='config', action='callback', callback=store_local_obj('get', lambda vx: {'opt': None}))
- for m in ('', '-rx'):
- # call this code 'Pythonic' eh?
- # have to define a one-shot local function to be able to inject (a value depending on the)
- # iteration variable into the inner lambda
- def conf_mod_opt_regex_variant(rx):
- op.add_option('--config-set' + m, metavar='OPT VAL', type=str, nargs=2, dest='config', action='callback',
- callback=store_local_obj('set', lambda vx: {'opt': vx[0], 'val': vx[1], 'rx': rx}))
- op.add_option('--config-del' + m, metavar='OPT', type=str, dest='config', action='callback',
- callback=store_local_obj('del', lambda vx: {'opt': vx, 'rx': rx}))
- conf_mod_opt_regex_variant(not not m)
-
- op.add_option('--normalize-url', dest='url_print', action='callback', callback=store_local_curry('normal'))
- op.add_option('--canonicalize-url', dest='url_print', action='callback', callback=store_local_curry('canon'))
- op.add_option('--canonicalize-escape-url', dest='url_print', action='callback', callback=store_local_curry('canon_esc'))
-
- tunables = [ norm(o.get_opt_string()[2:]) for o in op.option_list if o.callback in (store_abs, None) and o.get_opt_string() not in ('--version', '--help') ]
-
- # precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults
- # -- for this to work out we need to tell apart defaults from explicitly set
- # options... so churn out the defaults here and call the parser with virgin
- # values container.
- defaults = op.get_default_values()
- opts, args = op.parse_args(values=optparse.Values())
- confdata = rconf.get('config')
- if not (len(args) == 2 or \
- (len(args) == 1 and rconf.get('listen')) or \
- (len(args) <= 2 and confdata) or \
- rconf.get('url_print')):
- sys.stderr.write("error: incorrect number of arguments\n\n")
- sys.stderr.write(op.get_usage() + "\n")
- sys.exit(1)
-
- if getattr(confdata, 'rx', None):
- # peers are regexen, don't try to parse them
- canon_peers = args
- namedict = {}
- else:
- rscs = [resource.parse_url(u) for u in args]
- dc = rconf.get('url_print')
- if dc:
- for r in rscs:
- print(r.get_url(**{'normal': {},
- 'canon': {'canonical': True},
- 'canon_esc': {'canonical': True, 'escaped': True}}[dc]))
- return
- local = remote = None
- if rscs:
- local = rscs[0]
- if len(rscs) > 1:
- remote = rscs[1]
- if not local.can_connect_to(remote):
- raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path))
- pa = ([], [], [])
- urlprms = ({}, {'canonical': True}, {'canonical': True, 'escaped': True})
- for x in rscs:
- for i in range(len(pa)):
- pa[i].append(x.get_url(**urlprms[i]))
- peers, canon_peers, canon_esc_peers = pa
- # creating the namedict, a dict representing various ways of referring to / repreenting
- # peers to be fillable in config templates
- mods = (lambda x: x, lambda x: x[0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:])
- if remote:
- rmap = { local: ('local', 'master'), remote: ('remote', 'slave') }
- else:
- rmap = { local: ('local', 'slave') }
- namedict = {}
- for i in range(len(rscs)):
- x = rscs[i]
- for name in rmap[x]:
- for j in range(3):
- namedict[mods[j](name)] = pa[j][i]
- if x.scheme == 'gluster':
- namedict[name + 'vol'] = x.volume
- if not 'config_file' in rconf:
- rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd.conf")
- gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict)
-
- if confdata:
- opt_ok = norm(confdata.opt) in tunables + [None]
- if confdata.op == 'check':
- if opt_ok:
- sys.exit(0)
- else:
- sys.exit(1)
- elif not opt_ok:
- raise GsyncdError("not a valid option: " + confdata.opt)
- if confdata.op == 'get':
- gcnf.get(confdata.opt)
- elif confdata.op == 'set':
- gcnf.set(confdata.opt, confdata.val, confdata.rx)
- elif confdata.op == 'del':
- gcnf.delete(confdata.opt, confdata.rx)
- return
-
- gconf.__dict__.update(defaults.__dict__)
- gcnf.update_to(gconf.__dict__)
- gconf.__dict__.update(opts.__dict__)
- gconf.configinterface = gcnf
-
- ffd = rconf.get('feedback_fd')
- if ffd:
- fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
-
- #normalize loglevel
- lvl0 = gconf.log_level
- if isinstance(lvl0, str):
- lvl1 = lvl0.upper()
- lvl2 = logging.getLevelName(lvl1)
- # I have _never_ _ever_ seen such an utterly braindead
- # error condition
- if lvl2 == "Level " + lvl1:
- raise GsyncdError('cannot recognize log level "%s"' % lvl0)
- gconf.log_level = lvl2
-
- go_daemon = rconf['go_daemon']
- be_monitor = rconf.get('monitor')
-
- if not be_monitor and isinstance(remote, resource.SSH) and \
- go_daemon == 'should':
- go_daemon = 'postconn'
- log_file = None
- else:
- log_file = gconf.log_file
- if be_monitor:
- label = 'monitor'
- elif remote:
- #master
- label = ''
- else:
- label = 'slave'
- startup(go_daemon=go_daemon, log_file=log_file, label=label)
-
- if be_monitor:
- return monitor()
-
- logging.info("syncing: %s" % " -> ".join(peers))
- if remote:
- go_daemon = remote.connect_remote(go_daemon=go_daemon)
- if go_daemon:
- startup(go_daemon=go_daemon, log_file=gconf.log_file)
- # complete remote connection in child
- remote.connect_remote(go_daemon='done')
- local.connect()
- if ffd:
- os.close(ffd)
- local.service_loop(*[r for r in [remote] if r])
-
-
-if __name__ == "__main__":
- main()
diff --git a/xlators/features/marker/utils/syncdaemon/libcxattr.py b/xlators/features/marker/utils/syncdaemon/libcxattr.py
deleted file mode 100644
index fdc016c47..000000000
--- a/xlators/features/marker/utils/syncdaemon/libcxattr.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import os
-from ctypes import *
-from ctypes.util import find_library
-
-class Xattr(object):
-
- libc = CDLL(find_library("libc"))
-
- @classmethod
- def geterrno(cls):
- return c_int.in_dll(cls.libc, 'errno').value
-
- @classmethod
- def raise_oserr(cls):
- errn = cls.geterrno()
- raise OSError(errn, os.strerror(errn))
-
- @classmethod
- def _query_xattr(cls, path, siz, syscall, *a):
- if siz:
- buf = create_string_buffer('\0' * siz)
- else:
- buf = None
- ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
- if ret == -1:
- cls.raise_oserr()
- if siz:
- return buf.raw[:ret]
- else:
- return ret
-
- @classmethod
- def lgetxattr(cls, path, attr, siz=0):
- return cls._query_xattr( path, siz, 'lgetxattr', attr)
-
- @classmethod
- def llistxattr(cls, path, siz=0):
- ret = cls._query_xattr(path, siz, 'llistxattr')
- if isinstance(ret, str):
- ret = ret.split('\0')
- return ret
-
- @classmethod
- def lsetxattr(cls, path, attr, val):
- ret = cls.libc.lsetxattr(path, attr, val, len(val), 0)
- if ret == -1:
- cls.raise_oserr()
-
- @classmethod
- def lremovexattr(cls, path, attr):
- ret = cls.libc.lremovexattr(path, attr)
- if ret == -1:
- cls.raise_oserr()
-
- @classmethod
- def llistxattr_buf(cls, path):
- size = cls.llistxattr(path)
- if size == -1:
- cls.raise_oserr()
- if size == 0:
- return []
- return cls.llistxattr(path, size)
diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py
deleted file mode 100644
index 495634b06..000000000
--- a/xlators/features/marker/utils/syncdaemon/master.py
+++ /dev/null
@@ -1,392 +0,0 @@
-import os
-import sys
-import time
-import stat
-import signal
-import logging
-import errno
-from errno import ENOENT, ENODATA
-from threading import currentThread, Condition, Lock
-
-from gconf import gconf
-from syncdutils import FreeObject, Thread, GsyncdError
-
-URXTIME = (-1, 0)
-
-class GMaster(object):
-
- KFGN = 0
- KNAT = 1
-
- def get_sys_volinfo(self):
- fgn_vis, nat_vi = self.master.server.foreign_volume_infos(), \
- self.master.server.native_volume_info()
- fgn_vi = None
- if fgn_vis:
- if len(fgn_vis) > 1:
- raise GsyncdError("cannot work with multiple foreign masters")
- fgn_vi = fgn_vis[0]
- return fgn_vi, nat_vi
-
- @property
- def uuid(self):
- if self.volinfo:
- return self.volinfo['uuid']
-
- @property
- def volmark(self):
- if self.volinfo:
- return self.volinfo['volume_mark']
-
- @property
- def inter_master(self):
- return self.volinfo_state[self.KFGN] and True or False
-
- def xtime(self, path, *a, **opts):
- if a:
- rsc = a[0]
- else:
- rsc = self.master
- if not 'create' in opts:
- opts['create'] = (rsc == self.master and not self.inter_master)
- if not 'default_xtime' in opts:
- if rsc == self.master and self.inter_master:
- opts['default_xtime'] = ENODATA
- else:
- opts['default_xtime'] = URXTIME
- xt = rsc.server.xtime(path, self.uuid)
- if isinstance(xt, int) and xt != ENODATA:
- return xt
- invalid_xtime = (xt == ENODATA or xt < self.volmark)
- if invalid_xtime and opts['create']:
- t = time.time()
- sec = int(t)
- nsec = int((t - sec) * 1000000)
- xt = (sec, nsec)
- rsc.server.set_xtime(path, self.uuid, xt)
- if invalid_xtime:
- xt = opts['default_xtime']
- return xt
-
- def __init__(self, master, slave):
- self.master = master
- self.slave = slave
- self.jobtab = {}
- self.syncer = Syncer(slave)
- # crawls vs. turns:
- # - self.crawls is simply the number of crawl() invocations on root
- # - one turn is a maximal consecutive sequence of crawls so that each
- # crawl in it detects a change to be synced
- # - self.turns is the number of turns since start
- # - self.total_turns is a limit so that if self.turns reaches it, then
- # we exit (for diagnostic purposes)
- # so, eg., if the master fs changes unceasingly, self.turns will remain 0.
- self.crawls = 0
- self.turns = 0
- self.total_turns = int(gconf.turns)
- self.lastreport = {'crawls': 0, 'turns': 0}
- self.start = None
- self.change_seen = None
- # the authorative (foreign, native) volinfo pair
- # which lets us deduce what to do when we refetch
- # the volinfos from system
- uuid_preset = getattr(gconf, 'volume_id', None)
- self.volinfo_state = (uuid_preset and {'uuid': uuid_preset}, None)
- # the actual volinfo we make use of
- self.volinfo = None
- self.terminate = False
-
- def crawl_loop(self):
- timo = int(gconf.timeout or 0)
- if timo > 0:
- def keep_alive():
- while True:
- gap = timo * 0.5
- # first grab a reference as self.volinfo
- # can be changed in main thread
- vi = self.volinfo
- if vi:
- # then have a private copy which we can mod
- vi = vi.copy()
- vi['timeout'] = int(time.time()) + timo
- else:
- # send keep-alives more frequently to
- # avoid a delay in announcing our volume info
- # to slave if it becomes established in the
- # meantime
- gap = min(10, gap)
- self.slave.server.keep_alive(vi)
- time.sleep(gap)
- t = Thread(target=keep_alive)
- t.start()
- self.lastreport['time'] = time.time()
- while not self.terminate:
- self.crawl()
-
- def add_job(self, path, label, job, *a, **kw):
- if self.jobtab.get(path) == None:
- self.jobtab[path] = []
- self.jobtab[path].append((label, a, lambda : job(*a, **kw)))
-
- def add_failjob(self, path, label):
- logging.debug('salvaged: ' + label)
- self.add_job(path, label, lambda: False)
-
- def wait(self, path, *args):
- jobs = self.jobtab.pop(path, [])
- succeed = True
- for j in jobs:
- ret = j[-1]()
- if not ret:
- succeed = False
- if succeed:
- self.sendmark(path, *args)
- return succeed
-
- def sendmark(self, path, mark, adct=None):
- if adct:
- self.slave.server.setattr(path, adct)
- self.slave.server.set_xtime(path, self.uuid, mark)
-
- @staticmethod
- def volinfo_state_machine(volinfo_state, volinfo_sys):
- # store the value below "boxed" to emulate proper closures
- # (variables of the enclosing scope are available inner functions
- # provided they are no reassigned; mutation is OK).
- param = FreeObject(relax_mismatch = False, state_change = None, index=-1)
- def select_vi(vi0, vi):
- param.index += 1
- if vi and (not vi0 or vi0['uuid'] == vi['uuid']):
- if not vi0 and not param.relax_mismatch:
- param.state_change = param.index
- # valid new value found; for the rest, we are graceful about
- # uuid mismatch
- param.relax_mismatch = True
- return vi
- if vi0 and vi and vi0['uuid'] != vi['uuid'] and not param.relax_mismatch:
- # uuid mismatch for master candidate, bail out
- raise GsyncdError("aborting on uuid change from %s to %s" % \
- (vi0['uuid'], vi['uuid']))
- # fall back to old
- return vi0
- newstate = tuple(select_vi(*vip) for vip in zip(volinfo_state, volinfo_sys))
- srep = lambda vi: vi and vi['uuid'][0:8]
- logging.debug('(%s, %s) << (%s, %s) -> (%s, %s)' % \
- tuple(srep(vi) for vi in volinfo_state + volinfo_sys + newstate))
- return newstate, param.state_change
-
- def crawl(self, path='.', xtl=None):
- if path == '.':
- if self.start:
- self.crawls += 1
- logging.debug("... crawl #%d done, took %.6f seconds" % \
- (self.crawls, time.time() - self.start))
- time.sleep(1)
- self.start = time.time()
- should_display_info = self.start - self.lastreport['time'] >= 60
- if should_display_info:
- logging.info("completed %d crawls, %d turns",
- self.crawls - self.lastreport['crawls'],
- self.turns - self.lastreport['turns'])
- self.lastreport.update(crawls = self.crawls,
- turns = self.turns,
- time = self.start)
- volinfo_sys = self.get_sys_volinfo()
- self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state,
- volinfo_sys)
- if self.inter_master:
- self.volinfo = volinfo_sys[self.KFGN]
- else:
- self.volinfo = volinfo_sys[self.KNAT]
- if state_change == self.KFGN or (state_change == self.KNAT and not self.inter_master):
- logging.info('new master is %s', self.uuid)
- if self.volinfo:
- logging.info("%s master with volume id %s ..." % \
- (self.inter_master and "intermediate" or "primary",
- self.uuid))
- if state_change == self.KFGN:
- gconf.configinterface.set('volume_id', self.uuid)
- if self.volinfo:
- if self.volinfo['retval']:
- raise GsyncdError ("master is corrupt")
- else:
- if should_display_info or self.crawls == 0:
- if self.inter_master:
- logging.info("waiting for being synced from %s ..." % \
- self.volinfo_state[self.KFGN]['uuid'])
- else:
- logging.info("waiting for volume info ...")
- return
- logging.debug("entering " + path)
- if not xtl:
- xtl = self.xtime(path)
- if isinstance(xtl, int):
- self.add_failjob(path, 'no-local-node')
- return
- xtr0 = self.xtime(path, self.slave)
- if isinstance(xtr0, int):
- if xtr0 != ENOENT:
- self.slave.server.purge(path)
- try:
- self.slave.server.mkdir(path)
- except OSError:
- self.add_failjob(path, 'no-remote-node')
- return
- xtr = URXTIME
- else:
- xtr = xtr0
- if xtr > xtl:
- raise GsyncdError("timestamp corruption for " + path)
- if xtl == xtr:
- if path == '.' and self.change_seen:
- self.turns += 1
- self.change_seen = False
- if self.total_turns:
- logging.info("finished turn #%s/%s" % \
- (self.turns, self.total_turns))
- if self.turns == self.total_turns:
- logging.info("reached turn limit")
- self.terminate = True
- return
- if path == '.':
- self.change_seen = True
- try:
- dem = self.master.server.entries(path)
- except OSError:
- self.add_failjob(path, 'local-entries-fail')
- return
- try:
- des = self.slave.server.entries(path)
- except OSError:
- self.slave.server.purge(path)
- try:
- self.slave.server.mkdir(path)
- des = self.slave.server.entries(path)
- except OSError:
- self.add_failjob(path, 'remote-entries-fail')
- return
- dd = set(des) - set(dem)
- if dd:
- self.slave.server.purge(path, dd)
- chld = []
- for e in dem:
- e = os.path.join(path, e)
- xte = self.xtime(e)
- if isinstance(xte, int):
- logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte]))
- elif xte > xtr:
- chld.append((e, xte))
- def indulgently(e, fnc, blame=None):
- if not blame:
- blame = path
- try:
- return fnc(e)
- except (IOError, OSError):
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- logging.warn("salvaged ENOENT for" + e)
- self.add_failjob(blame, 'by-indulgently')
- return False
- else:
- raise
- for e, xte in chld:
- st = indulgently(e, lambda e: os.lstat(e))
- if st == False:
- continue
- mo = st.st_mode
- adct = {'own': (st.st_uid, st.st_gid)}
- if stat.S_ISLNK(mo):
- if indulgently(e, lambda e: self.slave.server.symlink(os.readlink(e), e)) == False:
- continue
- self.sendmark(e, xte, adct)
- elif stat.S_ISREG(mo):
- logging.debug("syncing %s ..." % e)
- pb = self.syncer.add(e)
- def regjob(e, xte, pb):
- if pb.wait():
- logging.debug("synced " + e)
- self.sendmark(e, xte)
- return True
- else:
- logging.error("failed to sync " + e)
- self.add_job(path, 'reg', regjob, e, xte, pb)
- elif stat.S_ISDIR(mo):
- adct['mode'] = mo
- if indulgently(e, lambda e: (self.add_job(path, 'cwait', self.wait, e, xte, adct),
- self.crawl(e, xte),
- True)[-1], blame=e) == False:
- continue
- else:
- # ignore fifos, sockets and special files
- pass
- if path == '.':
- self.wait(path, xtl)
-
-class BoxClosedErr(Exception):
- pass
-
-class PostBox(list):
-
- def __init__(self, *a):
- list.__init__(self, *a)
- self.lever = Condition()
- self.open = True
- self.done = False
-
- def wait(self):
- self.lever.acquire()
- if not self.done:
- self.lever.wait()
- self.lever.release()
- return self.result
-
- def wakeup(self, data):
- self.result = data
- self.lever.acquire()
- self.done = True
- self.lever.notifyAll()
- self.lever.release()
-
- def append(self, e):
- self.lever.acquire()
- if not self.open:
- raise BoxClosedErr
- list.append(self, e)
- self.lever.release()
-
- def close(self):
- self.lever.acquire()
- self.open = False
- self.lever.release()
-
-class Syncer(object):
-
- def __init__(self, slave):
- self.slave = slave
- self.lock = Lock()
- self.pb = PostBox()
- for i in range(int(gconf.sync_jobs)):
- t = Thread(target=self.syncjob)
- t.start()
-
- def syncjob(self):
- while True:
- pb = None
- while True:
- self.lock.acquire()
- if self.pb:
- pb, self.pb = self.pb, PostBox()
- self.lock.release()
- if pb:
- break
- time.sleep(0.5)
- pb.close()
- pb.wakeup(self.slave.rsync(pb))
-
- def add(self, e):
- while True:
- try:
- self.pb.append(e)
- return self.pb
- except BoxClosedErr:
- pass
diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py
deleted file mode 100644
index 365e91435..000000000
--- a/xlators/features/marker/utils/syncdaemon/monitor.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import os
-import sys
-import time
-import logging
-import select
-from signal import SIGKILL
-from gconf import gconf
-from syncdutils import update_file
-
-class Monitor(object):
-
- def __init__(self):
- self.state = None
-
- def set_state(self, state):
- if state == self.state:
- return
- self.state = state
- logging.info('new state: %s' % state)
- if getattr(gconf, 'state_file', None):
- update_file(gconf.state_file, lambda f: f.write(state + '\n'))
-
- def monitor(self):
- argv = sys.argv[:]
- for o in ('-N', '--no-daemon', '--monitor'):
- while o in argv:
- argv.remove(o)
- argv.extend(('-N', '-p', ''))
- argv.insert(0, os.path.basename(sys.executable))
-
- self.set_state('starting...')
- ret = 0
- def nwait(p, o=0):
- p2, r = os.waitpid(p, o)
- if not p2:
- return
- if os.WIFEXITED(r):
- return os.WEXITSTATUS(r)
- return 1
- conn_timeout = 60
- while ret in (0, 1):
- logging.info('-' * conn_timeout)
- logging.info('starting gsyncd worker')
- pr, pw = os.pipe()
- cpid = os.fork()
- if cpid == 0:
- os.close(pr)
- os.execv(sys.executable, argv + ['--feedback-fd', str(pw)])
- os.close(pw)
- t0 = time.time()
- so = select.select((pr,), (), (), conn_timeout)[0]
- os.close(pr)
- if so:
- ret = nwait(cpid, os.WNOHANG)
- if ret != None:
- logging.debug("worker died before establishing connection")
- else:
- logging.debug("worker seems to be connected (?? racy check)")
- while time.time() < t0 + conn_timeout:
- ret = nwait(cpid, os.WNOHANG)
- if ret != None:
- logging.debug("worker died in startup phase")
- break
- time.sleep(1)
- else:
- logging.debug("worker not confirmed in %d sec, aborting it" % \
- conn_timeout)
- os.kill(cpid, SIGKILL)
- ret = nwait(cpid)
- if ret == None:
- self.set_state('OK')
- ret = nwait(cpid)
- elif ret in (0, 1):
- self.set_state('faulty')
- time.sleep(10)
- self.set_state('inconsistent')
- return ret
-
-def monitor():
- return Monitor().monitor()
diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/xlators/features/marker/utils/syncdaemon/repce.py
deleted file mode 100644
index 47691301e..000000000
--- a/xlators/features/marker/utils/syncdaemon/repce.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import os
-import sys
-import select
-import time
-import logging
-from threading import Condition
-try:
- import thread
-except ImportError:
- # py 3
- import _thread as thread
-try:
- from Queue import Queue
-except ImportError:
- # py 3
- from queue import Queue
-try:
- import cPickle as pickle
-except ImportError:
- # py 3
- import pickle
-
-from syncdutils import Thread
-
-pickle_proto = -1
-repce_version = 1.0
-
-def ioparse(i, o):
- if isinstance(i, int):
- i = os.fdopen(i)
- # rely on duck typing for recognizing
- # streams as that works uniformly
- # in py2 and py3
- if hasattr(o, 'fileno'):
- o = o.fileno()
- return (i, o)
-
-def send(out, *args):
- os.write(out, pickle.dumps(args, pickle_proto))
-
-def recv(inf):
- return pickle.load(inf)
-
-
-class RepceServer(object):
-
- def __init__(self, obj, i, o, wnum=6):
- self.obj = obj
- self.inf, self.out = ioparse(i, o)
- self.wnum = wnum
- self.q = Queue()
-
- def service_loop(self):
- for i in range(self.wnum):
- t = Thread(target=self.worker)
- t.start()
- try:
- while True:
- self.q.put(recv(self.inf))
- except EOFError:
- logging.info("terminating on reaching EOF.")
-
- def worker(self):
- while True:
- in_data = self.q.get(True)
- rid = in_data[0]
- rmeth = in_data[1]
- exc = False
- if rmeth == '__repce_version__':
- res = repce_version
- else:
- try:
- res = getattr(self.obj, rmeth)(*in_data[2:])
- except:
- res = sys.exc_info()[1]
- exc = True
- logging.exception("call failed: ")
- send(self.out, rid, exc, res)
-
-
-class RepceJob(object):
-
- def __init__(self, cbk):
- self.rid = (os.getpid(), thread.get_ident(), time.time())
- self.cbk = cbk
- self.lever = Condition()
- self.done = False
-
- def __repr__(self):
- return ':'.join([str(x) for x in self.rid])
-
- def wait(self):
- self.lever.acquire()
- if not self.done:
- self.lever.wait()
- self.lever.release()
- return self.result
-
- def wakeup(self, data):
- self.result = data
- self.lever.acquire()
- self.done = True
- self.lever.notify()
- self.lever.release()
-
-
-class RepceClient(object):
-
- def __init__(self, i, o):
- self.inf, self.out = ioparse(i, o)
- self.jtab = {}
- t = Thread(target = self.listen)
- t.start()
-
- def listen(self):
- while True:
- select.select((self.inf,), (), ())
- rid, exc, res = recv(self.inf)
- rjob = self.jtab.pop(rid)
- if rjob.cbk:
- rjob.cbk(rjob, [exc, res])
-
- def push(self, meth, *args, **kw):
- cbk = kw.get('cbk')
- if not cbk:
- def cbk(rj, res):
- if res[0]:
- raise res[1]
- rjob = RepceJob(cbk)
- self.jtab[rjob.rid] = rjob
- logging.debug("call %s %s%s ..." % (repr(rjob), meth, repr(args)))
- send(self.out, rjob.rid, meth, *args)
- return rjob
-
- def __call__(self, meth, *args):
- rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
- exc, res = rjob.wait()
- if exc:
- logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__)))
- raise res
- logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res)))
- return res
-
- class mprx(object):
-
- def __init__(self, ins, meth):
- self.ins = ins
- self.meth = meth
-
- def __call__(self, *a):
- return self.ins(self.meth, *a)
-
- def __getattr__(self, meth):
- return self.mprx(self, meth)
-
- def __version__(self):
- d = {'proto': self('__repce_version__')}
- try:
- d['object'] = self('version')
- except AttributeError:
- pass
- return d
diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py
deleted file mode 100644
index 30011b3d3..000000000
--- a/xlators/features/marker/utils/syncdaemon/resource.py
+++ /dev/null
@@ -1,476 +0,0 @@
-import re
-import os
-import sys
-import pwd
-import stat
-import time
-import errno
-import struct
-import select
-import socket
-import logging
-import tempfile
-from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR
-
-from gconf import gconf
-import repce
-from repce import RepceServer, RepceClient
-from master import GMaster
-import syncdutils
-from syncdutils import GsyncdError
-
-UrlRX = re.compile('\A(\w+)://(.*)')
-HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I)
-UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+")
-
-def sup(x, *a, **kw):
- return getattr(super(type(x), x), sys._getframe(1).f_code.co_name)(*a, **kw)
-
-def desugar(ustr):
- m = re.match('([^:]*):(.*)', ustr)
- if m:
- if not m.groups()[0]:
- return "gluster://localhost" + ustr
- elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]):
- return "ssh://" + ustr
- else:
- return "gluster://" + ustr
- else:
- if ustr[0] != '/':
- raise GsyncdError("cannot resolve sugared url '%s'" % ustr)
- ap = os.path.normpath(ustr)
- if ap.startswith('//'):
- ap = ap[1:]
- return "file://" + ap
-
-def gethostbyname(hnam):
- try:
- return socket.gethostbyname(hnam)
- except socket.gaierror:
- ex = sys.exc_info()[1]
- raise GsyncdError("failed to resolve %s: %s" % \
- (hnam, ex.strerror))
-
-def parse_url(ustr):
- m = UrlRX.match(ustr)
- if not m:
- ustr = desugar(ustr)
- m = UrlRX.match(ustr)
- if not m:
- raise GsyncdError("malformed url")
- sch, path = m.groups()
- this = sys.modules[__name__]
- if not hasattr(this, sch.upper()):
- raise GsyncdError("unknown url scheme " + sch)
- return getattr(this, sch.upper())(path)
-
-
-class _MetaXattr(object):
-
- # load Xattr stuff on-demand
-
- def __getattr__(self, meth):
- from libcxattr import Xattr as LXattr
- xmeth = [ m for m in dir(LXattr) if m[0] != '_' ]
- if not meth in xmeth:
- return
- for m in xmeth:
- setattr(self, m, getattr(LXattr, m))
- return getattr(self, meth)
-
-Xattr = _MetaXattr()
-
-
-class Server(object):
-
- GX_NSPACE = "trusted.glusterfs"
- NTV_FMTSTR = "!" + "B"*19 + "II"
- FRGN_XTRA_FMT = "I"
- FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
-
- @staticmethod
- def entries(path):
- # prevent symlinks being followed
- if not stat.S_ISDIR(os.lstat(path).st_mode):
- raise OSError(ENOTDIR, os.strerror(ENOTDIR))
- return os.listdir(path)
-
- @classmethod
- def purge(cls, path, entries=None):
- me_also = entries == None
- if not entries:
- try:
- # if it's a symlink, prevent
- # following it
- try:
- os.unlink(path)
- return
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == EISDIR:
- entries = os.listdir(path)
- else:
- raise
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno in (ENOTDIR, ENOENT, ELOOP):
- try:
- os.unlink(path)
- return
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- return
- raise
- else:
- raise
- for e in entries:
- cls.purge(os.path.join(path, e))
- if me_also:
- os.rmdir(path)
-
- @classmethod
- def _create(cls, path, ctor):
- try:
- ctor(path)
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == EEXIST:
- cls.purge(path)
- return ctor(path)
- raise
-
- @classmethod
- def mkdir(cls, path):
- cls._create(path, os.mkdir)
-
- @classmethod
- def symlink(cls, lnk, path):
- cls._create(path, lambda p: os.symlink(lnk, p))
-
- @classmethod
- def xtime(cls, path, uuid):
- try:
- return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), 8))
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno in (ENOENT, ENODATA, ENOTDIR):
- return ex.errno
- else:
- raise
-
- @classmethod
- def set_xtime(cls, path, uuid, mark):
- Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
-
- @staticmethod
- def setattr(path, adct):
- own = adct.get('own')
- if own:
- os.lchown(path, *own)
- mode = adct.get('mode')
- if mode:
- os.chmod(path, stat.S_IMODE(mode))
- times = adct.get('times')
- if times:
- os.utime(path, times)
-
- @staticmethod
- def pid():
- return os.getpid()
-
- last_keep_alive = 0
- @classmethod
- def keep_alive(cls, dct):
- if dct:
- key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']])
- val = struct.pack(cls.FRGN_FMTSTR,
- *(dct['version'] +
- tuple(int(x,16) for x in re.findall('(?:[\da-f]){2}', dct['uuid'])) +
- (dct['retval'],) + dct['volume_mark'][0:2] + (dct['timeout'],)))
- Xattr.lsetxattr('.', key, val)
- cls.last_keep_alive += 1
- return cls.last_keep_alive
-
- @staticmethod
- def version():
- return 1.0
-
-
-class SlaveLocal(object):
-
- def can_connect_to(self, remote):
- return not remote
-
- def service_loop(self):
- repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs))
- t = syncdutils.Thread(target=lambda: (repce.service_loop(),
- syncdutils.finalize()))
- t.start()
- logging.info("slave listening")
- if gconf.timeout and int(gconf.timeout) > 0:
- while True:
- lp = self.server.last_keep_alive
- time.sleep(int(gconf.timeout))
- if lp == self.server.last_keep_alive:
- logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout))
- break
- else:
- select.select((), (), ())
-
-class SlaveRemote(object):
-
- def connect_remote(self, rargs=[], **opts):
- slave = opts.get('slave', self.url)
- ix, ox = os.pipe()
- iy, oy = os.pipe()
- pid = os.fork()
- if not pid:
- os.close(ox)
- os.dup2(ix, sys.stdin.fileno())
- os.close(iy)
- os.dup2(oy, sys.stdout.fileno())
- so = getattr(gconf, 'session_owner', None)
- if so:
- so_args = ['--session-owner', so]
- else:
- so_args = []
- argv = rargs + gconf.remote_gsyncd.split() + so_args + \
- ['-N', '--listen', '--timeout', str(gconf.timeout), slave]
- os.execvp(argv[0], argv)
- os.close(ix)
- os.close(oy)
- return self.start_fd_client(iy, ox, **opts)
-
- def start_fd_client(self, i, o, **opts):
- self.server = RepceClient(i, o)
- rv = self.server.__version__()
- exrv = {'proto': repce.repce_version, 'object': Server.version()}
- da0 = (rv, exrv)
- da1 = ({}, {})
- for i in range(2):
- for k, v in da0[i].iteritems():
- da1[i][k] = int(v)
- if da1[0] != da1[1]:
- raise GsyncdError("RePCe major version mismatch: local %s, remote %s" % (exrv, rv))
-
- def rsync(self, files, *args):
- if not files:
- raise GsyncdError("no files to sync")
- logging.debug("files: " + ", ".join(files))
- argv = gconf.rsync_command.split() + gconf.rsync_extra.split() + ['-aR'] + files + list(args)
- return os.spawnvp(os.P_WAIT, argv[0], argv) == 0
-
-
-class AbstractUrl(object):
-
- def __init__(self, path, pattern):
- m = re.search(pattern, path)
- if not m:
- raise GsyncdError("malformed path")
- self.path = path
- return m.groups()
-
- @property
- def scheme(self):
- return type(self).__name__.lower()
-
- def canonical_path(self):
- return self.path
-
- def get_url(self, canonical=False, escaped=False):
- if canonical:
- pa = self.canonical_path()
- else:
- pa = self.path
- u = "://".join((self.scheme, pa))
- if escaped:
- u = syncdutils.escape(u)
- return u
-
- @property
- def url(self):
- return self.get_url()
-
-
- ### Concrete resource classes ###
-
-
-class FILE(AbstractUrl, SlaveLocal, SlaveRemote):
-
- class FILEServer(Server):
- pass
-
- server = FILEServer
-
- def __init__(self, path):
- sup(self, path, '^/')
-
- def connect(self):
- os.chdir(self.path)
-
- def rsync(self, files):
- return sup(self, files, self.path)
-
-
-class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
-
- class GLUSTERServer(Server):
-
- @classmethod
- def _attr_unpack_dict(cls, xattr, extra_fields = ''):
- fmt_string = cls.NTV_FMTSTR + extra_fields
- buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
- vm = struct.unpack(fmt_string, buf)
- m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in vm[2:18]]))
- uuid = '-'.join(m.groups())
- volinfo = { 'version': vm[0:2],
- 'uuid' : uuid,
- 'retval' : vm[18],
- 'volume_mark': vm[19:21],
- }
- if extra_fields:
- return volinfo, vm[-len(extra_fields):]
- else:
- return volinfo
-
- @classmethod
- def foreign_volume_infos(cls):
- dict_list = []
- xattr_list = Xattr.llistxattr_buf('.')
- for ele in xattr_list:
- if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
- d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
- now = int(time.time())
- if x[0] > now:
- logging.debug("volinfo[%s] expires: %d (%d sec later)" % \
- (d['uuid'], x[0], x[0] - now))
- dict_list.append(d)
- else:
- try:
- Xattr.lremovexattr('.', ele)
- except OSError:
- pass
- return dict_list
-
- @classmethod
- def native_volume_info(cls):
- try:
- return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE, 'volume-mark']))
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno != ENODATA:
- raise
-
- server = GLUSTERServer
-
- def __init__(self, path):
- self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern)
-
- def canonical_path(self):
- return ':'.join([gethostbyname(self.host), self.volume])
-
- def can_connect_to(self, remote):
- return True
-
- def connect(self):
- def umount_l(d):
- argv = ['umount', '-l', d]
- return os.spawnvp(os.P_WAIT, argv[0], argv)
- d = tempfile.mkdtemp(prefix='gsyncd-aux-mount-')
- mounted = False
- try:
- argv = gconf.gluster_command.split() + \
- (gconf.gluster_log_level and ['-L', gconf.gluster_log_level] or []) + \
- ['-l', gconf.gluster_log_file, '-s', self.host,
- '--volfile-id', self.volume, '--client-pid=-1', d]
- if os.spawnvp(os.P_WAIT, argv[0], argv):
- raise GsyncdError("command failed: " + " ".join(argv))
- mounted = True
- logging.debug('auxiliary glusterfs mount in place')
- os.chdir(d)
- if umount_l(d) != 0:
- raise GsyncdError("umounting %s failed" % d)
- mounted = False
- finally:
- try:
- if mounted:
- umount_l(d)
- os.rmdir(d)
- except:
- logging.warn('stale mount possibly left behind on ' + d)
- logging.debug('auxiliary glusterfs mount prepared')
-
- def connect_remote(self, *a, **kw):
- sup(self, *a, **kw)
- self.slavedir = "/proc/%d/cwd" % self.server.pid()
-
- def service_loop(self, *args):
- if args:
- GMaster(self, args[0]).crawl_loop()
- else:
- sup(self, *args)
-
- def rsync(self, files):
- return sup(self, files, self.slavedir)
-
-
-class SSH(AbstractUrl, SlaveRemote):
-
- def __init__(self, path):
- self.remote_addr, inner_url = sup(self, path,
- '^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ]))
- self.inner_rsc = parse_url(inner_url)
-
- def canonical_path(self):
- m = re.match('([^@]+)@(.+)', self.remote_addr)
- if m:
- u, h = m.groups()
- else:
- u, h = pwd.getpwuid(os.geteuid()).pw_name, self.remote_addr
- remote_addr = '@'.join([u, gethostbyname(h)])
- return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)])
-
- def can_connect_to(self, remote):
- return False
-
- def start_fd_client(self, *a, **opts):
- if opts.get('deferred'):
- return a
- sup(self, *a)
- ityp = type(self.inner_rsc)
- if ityp == FILE:
- slavepath = self.inner_rsc.path
- elif ityp == GLUSTER:
- slavepath = "/proc/%d/cwd" % self.server.pid()
- else:
- raise NotImplementedError
- self.slaveurl = ':'.join([self.remote_addr, slavepath])
-
- def connect_remote(self, go_daemon=None):
- if go_daemon == 'done':
- return self.start_fd_client(*self.fd_pair)
- gconf.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'))
- deferred = go_daemon == 'postconn'
- ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred)
- if deferred:
- # send a message to peer so that we can wait for
- # the answer from which we know connection is
- # established and we can proceed with daemonization
- # (doing that too early robs the ssh passwd prompt...)
- # However, we'd better not start the RepceClient
- # before daemonization (that's not preserved properly
- # in daemon), we just do a an ad-hoc linear put/get.
- i, o = ret
- inf = os.fdopen(i)
- repce.send(o, None, '__repce_version__')
- select.select((inf,), (), ())
- repce.recv(inf)
- # hack hack hack: store a global reference to the file
- # to save it from getting GC'd which implies closing it
- gconf.permanent_handles.append(inf)
- self.fd_pair = (i, o)
- return 'should'
-
- def rsync(self, files):
- return sup(self, files, '-ze', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), self.slaveurl)
diff --git a/xlators/features/marker/utils/syncdaemon/syncdutils.py b/xlators/features/marker/utils/syncdaemon/syncdutils.py
deleted file mode 100644
index a905745f1..000000000
--- a/xlators/features/marker/utils/syncdaemon/syncdutils.py
+++ /dev/null
@@ -1,188 +0,0 @@
-import os
-import sys
-import time
-import fcntl
-import shutil
-import logging
-from threading import Lock, Thread as baseThread
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN
-from signal import SIGTERM, SIGKILL
-from time import sleep
-from cPickle import PickleError
-
-from gconf import gconf
-
-try:
- # py 3
- from urllib import parse as urllib
-except ImportError:
- import urllib
-
-def escape(s):
- return urllib.quote_plus(s)
-
-def unescape(s):
- return urllib.unquote_plus(s)
-
-def norm(s):
- if s:
- return s.replace('-', '_')
-
-def update_file(path, updater, merger = lambda f: True):
- """update a file in a transaction-like manner"""
-
- fr = fw = None
- try:
- fd = os.open(path, os.O_CREAT|os.O_RDWR)
- try:
- fr = os.fdopen(fd, 'r+b')
- except:
- os.close(fd)
- raise
- fcntl.lockf(fr, fcntl.LOCK_EX)
- if not merger(fr):
- return
-
- tmpp = path + '.tmp.' + str(os.getpid())
- fd = os.open(tmpp, os.O_CREAT|os.O_EXCL|os.O_WRONLY)
- try:
- fw = os.fdopen(fd, 'wb', 0)
- except:
- os.close(fd)
- raise
- updater(fw)
- os.fsync(fd)
- os.rename(tmpp, path)
- finally:
- for fx in (fr, fw):
- if fx:
- fx.close()
-
-def grabfile(fname, content=None):
- # damn those messy open() mode codes
- fd = os.open(fname, os.O_CREAT|os.O_RDWR)
- f = os.fdopen(fd, 'r+b', 0)
- try:
- fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
- except:
- ex = sys.exc_info()[1]
- f.close()
- if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
- # cannot grab, it's taken
- return
- raise
- if content:
- try:
- f.truncate()
- f.write(content)
- except:
- f.close()
- raise
- gconf.permanent_handles.append(f)
- return f
-
-def grabpidfile(fname=None, setpid=True):
- if not fname:
- fname = gconf.pid_file
- content = None
- if setpid:
- content = str(os.getpid()) + '\n'
- return grabfile(fname, content=content)
-
-final_lock = Lock()
-
-def finalize(*a, **kw):
- final_lock.acquire()
- if getattr(gconf, 'pid_file', None):
- rm_pidf = gconf.pid_file_owned
- if gconf.cpid:
- # exit path from parent branch of daemonization
- rm_pidf = False
- while True:
- f = grabpidfile(setpid=False)
- if not f:
- # child has already taken over pidfile
- break
- if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid:
- # child has terminated
- rm_pidf = True
- break;
- time.sleep(0.1)
- if rm_pidf:
- try:
- os.unlink(gconf.pid_file)
- except:
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- pass
- else:
- raise
- if gconf.ssh_ctl_dir and not gconf.cpid:
- shutil.rmtree(gconf.ssh_ctl_dir)
- if gconf.log_exit:
- logging.info("exiting.")
- sys.stdout.flush()
- sys.stderr.flush()
- os._exit(kw.get('exval', 0))
-
-def log_raise_exception(excont):
- is_filelog = False
- for h in logging.getLogger().handlers:
- fno = getattr(getattr(h, 'stream', None), 'fileno', None)
- if fno and not os.isatty(fno()):
- is_filelog = True
-
- exc = sys.exc_info()[1]
- if isinstance(exc, SystemExit):
- excont.exval = exc.code or 0
- raise
- else:
- logtag = None
- if isinstance(exc, GsyncdError):
- if is_filelog:
- logging.error(exc.message)
- sys.stderr.write('failure: ' + exc.message + "\n")
- elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \
- ((isinstance(exc, OSError) or isinstance(exc, IOError)) and \
- exc.errno == EPIPE):
- logging.error('connection to peer is broken')
- elif isinstance(exc, OSError) and exc.errno == ENOTCONN:
- logging.error('glusterfs session went down')
- else:
- logtag = "FAIL"
- if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
- logtag = "FULL EXCEPTION TRACE"
- if logtag:
- logging.exception(logtag + ": ")
- sys.stderr.write("failed with %s.\n" % type(exc).__name__)
- excont.exval = 1
- sys.exit(excont.exval)
-
-
-class FreeObject(object):
- """wildcard class for which any attribute can be set"""
-
- def __init__(self, **kw):
- for k,v in kw.iteritems():
- setattr(self, k, v)
-
-class Thread(baseThread):
-
- def __init__(self, *a, **kw):
- tf = kw.get('target')
- if tf:
- def twrap(*aa):
- excont = FreeObject(exval = 0)
- try:
- tf(*aa)
- except:
- try:
- log_raise_exception(excont)
- finally:
- finalize(exval = excont.exval)
- kw['target'] = twrap
- baseThread.__init__(self, *a, **kw)
- self.setDaemon(True)
-
-class GsyncdError(StandardError):
- pass
diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am
index 58cfed0f9..393a7bd08 100644
--- a/xlators/features/path-convertor/src/Makefile.am
+++ b/xlators/features/path-convertor/src/Makefile.am
@@ -2,13 +2,14 @@
xlator_LTLIBRARIES = path-converter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-path_converter_la_LDFLAGS = -module -avoidversion
+path_converter_la_LDFLAGS = -module -avoid-version
path_converter_la_SOURCES = path.c
path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/path-convertor/src/path-mem-types.h b/xlators/features/path-convertor/src/path-mem-types.h
index c071513b6..77ada8d53 100644
--- a/xlators/features/path-convertor/src/path-mem-types.h
+++ b/xlators/features/path-convertor/src/path-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __PATH_MEM_TYPES_H__
#define __PATH_MEM_TYPES_H__
diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c
index f61630535..5c52e0a8d 100644
--- a/xlators/features/path-convertor/src/path.c
+++ b/xlators/features/path-convertor/src/path.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* TODO: add gf_log to all the cases returning errors */
#ifndef _CONFIG_H
@@ -52,7 +42,7 @@ static char *
name_this_to_that (xlator_t *xl, const char *path, const char *name)
{
path_private_t *priv = xl->private;
- char priv_path[ZR_PATH_MAX] = {0,};
+ char priv_path[PATH_MAX] = {0,};
char *tmp_name = NULL;
int32_t path_len = strlen (path);
int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN;
@@ -848,8 +838,7 @@ path_setxattr (call_frame_t *frame,
if (tmp_path != loc_path)
GF_FREE (tmp_path);
- if (tmp_name)
- GF_FREE (tmp_name);
+ GF_FREE (tmp_name);
return 0;
}
diff --git a/xlators/protocol/legacy/client/Makefile.am b/xlators/features/protect/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/xlators/protocol/legacy/client/Makefile.am
+++ b/xlators/features/protect/Makefile.am
diff --git a/xlators/features/protect/src/Makefile.am b/xlators/features/protect/src/Makefile.am
new file mode 100644
index 000000000..7eb93f32e
--- /dev/null
+++ b/xlators/features/protect/src/Makefile.am
@@ -0,0 +1,21 @@
+xlator_LTLIBRARIES = prot_dht.la prot_client.la prot_server.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+prot_dht_la_LDFLAGS = -module -avoidversion
+prot_dht_la_SOURCES = prot_dht.c
+prot_dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_client_la_LDFLAGS = -module -avoidversion
+prot_client_la_SOURCES = prot_client.c
+prot_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_server_la_LDFLAGS = -module -avoidversion
+prot_server_la_SOURCES = prot_server.c
+prot_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/protect/src/prot_client.c b/xlators/features/protect/src/prot_client.c
new file mode 100644
index 000000000..a27216d0a
--- /dev/null
+++ b/xlators/features/protect/src/prot_client.c
@@ -0,0 +1,215 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include <execinfo.h>
+
+#define NUM_FRAMES 20
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+enum {
+ PROT_ACT_NONE = 0,
+ PROT_ACT_LOG,
+ PROT_ACT_REJECT,
+};
+
+void
+pcli_print_trace (char *name, call_frame_t *frame)
+{
+ void *frames[NUM_FRAMES];
+ char **symbols;
+ int size;
+ int i;
+
+ gf_log (name, GF_LOG_INFO, "Translator stack:");
+ while (frame) {
+ gf_log (name, GF_LOG_INFO, "%s (%s)",
+ frame->wind_from, frame->this->name);
+ frame = frame->next;
+ }
+
+ size = backtrace(frames,NUM_FRAMES);
+ if (size <= 0) {
+ return;
+ }
+ symbols = backtrace_symbols(frames,size);
+ if (!symbols) {
+ return;
+ }
+
+ gf_log(name, GF_LOG_INFO, "Processor stack:");
+ for (i = 0; i < size; ++i) {
+ gf_log (name, GF_LOG_INFO, "%s", symbols[i]);
+ }
+ free(symbols);
+}
+
+int32_t
+pcli_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ uint64_t value;
+
+ if (newloc->parent == oldloc->parent) {
+ gf_log (this->name, GF_LOG_DEBUG, "rename in same directory");
+ goto simple_unwind;
+ }
+ if (!oldloc->parent) {
+ goto simple_unwind;
+ }
+ if (inode_ctx_get(oldloc->parent,this,&value) != 0) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got rename for protected %s", oldloc->path);
+ pcli_print_trace(this->name,frame->next);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (rename, frame, -1, EPERM,
+ NULL, NULL, NULL, NULL, NULL,
+ xdata);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+int32_t
+pcli_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ data_t *data;
+ uint64_t value;
+
+ /*
+ * We can't use dict_get_str and strcmp here, because the value comes
+ * directly from the user and might not be NUL-terminated (it would
+ * be if we had set it ourselves.
+ */
+
+ data = dict_get(dict,PROTECT_KEY);
+ if (!data) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "got %s request", PROTECT_KEY);
+ if (!strncmp(data->data,"log",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "logging removals on %s", loc->path);
+ value = PROT_ACT_LOG;
+ }
+ else if (!strncmp(data->data,"reject",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rejecting removals on %s", loc->path);
+ value = PROT_ACT_REJECT;
+ }
+ else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "removing protection on %s", loc->path);
+ value = PROT_ACT_NONE;
+ }
+ /* Right now the value doesn't matter - just the presence. */
+ if (inode_ctx_set(loc->inode,this,&value) != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set protection status for %s", loc->path);
+ }
+ STACK_UNWIND_STRICT (setxattr, frame, 0, 0, NULL);
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+pcli_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ uint64_t value;
+
+ if (!loc->parent || (inode_ctx_get(loc->parent,this,&value) != 0)) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got unlink for protected %s", loc->path);
+ pcli_print_trace(this->name,frame->next);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, EPERM,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .rename = pcli_rename,
+ .setxattr = pcli_setxattr,
+ .unlink = pcli_unlink,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_dht.c b/xlators/features/protect/src/prot_dht.c
new file mode 100644
index 000000000..feec6ffd6
--- /dev/null
+++ b/xlators/features/protect/src/prot_dht.c
@@ -0,0 +1,168 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+enum gf_pdht_mem_types_ {
+ gf_pdht_mt_coord_t = gf_common_mt_end + 1,
+ gf_pdht_mt_end
+};
+
+typedef struct {
+ pthread_mutex_t lock;
+ uint16_t refs;
+ int32_t op_ret;
+ int32_t op_errno;
+ dict_t *xdata;
+} pdht_coord_t;
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+void
+pdht_unref_and_unlock (call_frame_t *frame, xlator_t *this,
+ pdht_coord_t *coord)
+{
+ gf_boolean_t should_unwind;
+
+ should_unwind = (--(coord->refs) == 0);
+ pthread_mutex_unlock(&coord->lock);
+
+ if (should_unwind) {
+ STACK_UNWIND_STRICT (setxattr, frame,
+ coord->op_ret, coord->op_errno,
+ coord->xdata);
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ GF_FREE(coord);
+ }
+}
+
+int32_t
+pdht_recurse_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ pdht_coord_t *coord = cookie;
+
+ pthread_mutex_lock(&coord->lock);
+ if (op_ret) {
+ coord->op_ret = op_ret;
+ coord->op_errno = op_errno;
+ }
+ if (xdata) {
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ coord->xdata = dict_ref(xdata);
+ }
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+}
+
+void
+pdht_recurse (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata, xlator_t *xl, pdht_coord_t *coord)
+{
+ xlator_list_t *iter;
+
+ if (!strcmp(xl->type,"features/prot_client")) {
+ pthread_mutex_lock(&coord->lock);
+ ++(coord->refs);
+ pthread_mutex_unlock(&coord->lock);
+ STACK_WIND_COOKIE (frame, pdht_recurse_cbk, coord, xl,
+ xl->fops->setxattr, loc, dict, flags, xdata);
+ }
+
+ else for (iter = xl->children; iter; iter = iter->next) {
+ pdht_recurse (frame, this, loc, dict, flags, xdata,
+ iter->xlator, coord);
+ }
+}
+
+int32_t
+pdht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ pdht_coord_t *coord;
+
+ if (!dict_get(dict,PROTECT_KEY)) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ coord = GF_CALLOC(1,sizeof(*coord),gf_pdht_mt_coord_t);
+ if (!coord) {
+ gf_log (this->name, GF_LOG_WARNING, "allocation failed");
+ goto simple_wind;
+ }
+
+ pthread_mutex_init(&coord->lock,NULL);
+ coord->refs = 1;
+ coord->op_ret = 0;
+ coord->xdata = NULL;
+
+ pdht_recurse(frame,this,loc,dict,flags,xdata,this,coord);
+ pthread_mutex_lock(&coord->lock);
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .setxattr = pdht_setxattr,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_server.c b/xlators/features/protect/src/prot_server.c
new file mode 100644
index 000000000..beaee0889
--- /dev/null
+++ b/xlators/features/protect/src/prot_server.c
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/qemu-block/Makefile.am b/xlators/features/qemu-block/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/xlators/features/qemu-block/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/features/qemu-block/src/Makefile.am b/xlators/features/qemu-block/src/Makefile.am
new file mode 100644
index 000000000..08a7b62a0
--- /dev/null
+++ b/xlators/features/qemu-block/src/Makefile.am
@@ -0,0 +1,155 @@
+if ENABLE_QEMU_BLOCK
+xlator_LTLIBRARIES = qemu-block.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+qemu_block_la_LDFLAGS = -module -avoid-version
+qemu_block_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GLIB_LIBS) -lz -lrt
+
+qemu_block_la_SOURCES_qemu = \
+ $(CONTRIBDIR)/qemu/qemu-coroutine.c \
+ $(CONTRIBDIR)/qemu/qemu-coroutine-lock.c \
+ $(CONTRIBDIR)/qemu/qemu-coroutine-sleep.c \
+ $(CONTRIBDIR)/qemu/coroutine-ucontext.c \
+ $(CONTRIBDIR)/qemu/block.c \
+ $(CONTRIBDIR)/qemu/nop-symbols.c
+
+qemu_block_la_SOURCES_qemu_util = \
+ $(CONTRIBDIR)/qemu/util/aes.c \
+ $(CONTRIBDIR)/qemu/util/bitmap.c \
+ $(CONTRIBDIR)/qemu/util/bitops.c \
+ $(CONTRIBDIR)/qemu/util/cutils.c \
+ $(CONTRIBDIR)/qemu/util/error.c \
+ $(CONTRIBDIR)/qemu/util/hbitmap.c \
+ $(CONTRIBDIR)/qemu/util/iov.c \
+ $(CONTRIBDIR)/qemu/util/module.c \
+ $(CONTRIBDIR)/qemu/util/oslib-posix.c \
+ $(CONTRIBDIR)/qemu/util/qemu-option.c \
+ $(CONTRIBDIR)/qemu/util/qemu-error.c \
+ $(CONTRIBDIR)/qemu/util/qemu-thread-posix.c \
+ $(CONTRIBDIR)/qemu/util/unicode.c \
+ $(CONTRIBDIR)/qemu/util/hexdump.c
+
+qemu_block_la_SOURCES_qemu_block = \
+ $(CONTRIBDIR)/qemu/block/snapshot.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-cache.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-cluster.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-refcount.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-snapshot.c \
+ $(CONTRIBDIR)/qemu/block/qcow2.c \
+ $(CONTRIBDIR)/qemu/block/qed-check.c \
+ $(CONTRIBDIR)/qemu/block/qed-cluster.c \
+ $(CONTRIBDIR)/qemu/block/qed-gencb.c \
+ $(CONTRIBDIR)/qemu/block/qed-l2-cache.c \
+ $(CONTRIBDIR)/qemu/block/qed-table.c \
+ $(CONTRIBDIR)/qemu/block/qed.c
+
+qemu_block_la_SOURCES_qemu_qobject = \
+ $(CONTRIBDIR)/qemu/qobject/json-lexer.c \
+ $(CONTRIBDIR)/qemu/qobject/json-parser.c \
+ $(CONTRIBDIR)/qemu/qobject/json-streamer.c \
+ $(CONTRIBDIR)/qemu/qobject/qbool.c \
+ $(CONTRIBDIR)/qemu/qobject/qdict.c \
+ $(CONTRIBDIR)/qemu/qobject/qerror.c \
+ $(CONTRIBDIR)/qemu/qobject/qfloat.c \
+ $(CONTRIBDIR)/qemu/qobject/qint.c \
+ $(CONTRIBDIR)/qemu/qobject/qjson.c \
+ $(CONTRIBDIR)/qemu/qobject/qlist.c \
+ $(CONTRIBDIR)/qemu/qobject/qstring.c
+
+qemu_block_la_SOURCES = \
+ $(qemu_block_la_SOURCES_qemu) \
+ $(qemu_block_la_SOURCES_qemu_util) \
+ $(qemu_block_la_SOURCES_qemu_block) \
+ $(qemu_block_la_SOURCES_qemu_qobject) \
+ bdrv-xlator.c \
+ coroutine-synctask.c \
+ bh-syncop.c \
+ monitor-logging.c \
+ clock-timer.c \
+ qemu-block.c \
+ qb-coroutines.c
+
+noinst_HEADERS_qemu = \
+ $(CONTRIBDIR)/qemu/config-host.h \
+ $(CONTRIBDIR)/qemu/qapi-types.h \
+ $(CONTRIBDIR)/qemu/qmp-commands.h \
+ $(CONTRIBDIR)/qemu/trace/generated-tracers.h \
+ $(CONTRIBDIR)/qemu/include/config.h \
+ $(CONTRIBDIR)/qemu/include/glib-compat.h \
+ $(CONTRIBDIR)/qemu/include/qemu-common.h \
+ $(CONTRIBDIR)/qemu/include/trace.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine.h \
+ $(CONTRIBDIR)/qemu/include/block/aio.h \
+ $(CONTRIBDIR)/qemu/include/block/block.h \
+ $(CONTRIBDIR)/qemu/include/block/block_int.h \
+ $(CONTRIBDIR)/qemu/include/block/blockjob.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine_int.h \
+ $(CONTRIBDIR)/qemu/include/block/snapshot.h \
+ $(CONTRIBDIR)/qemu/include/exec/cpu-common.h \
+ $(CONTRIBDIR)/qemu/include/exec/hwaddr.h \
+ $(CONTRIBDIR)/qemu/include/exec/poison.h \
+ $(CONTRIBDIR)/qemu/include/fpu/softfloat.h \
+ $(CONTRIBDIR)/qemu/include/migration/migration.h \
+ $(CONTRIBDIR)/qemu/include/migration/qemu-file.h \
+ $(CONTRIBDIR)/qemu/include/migration/vmstate.h \
+ $(CONTRIBDIR)/qemu/include/monitor/monitor.h \
+ $(CONTRIBDIR)/qemu/include/monitor/readline.h \
+ $(CONTRIBDIR)/qemu/include/qapi/error.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-lexer.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-parser.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-streamer.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qbool.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qdict.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qerror.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qfloat.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qint.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qjson.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qlist.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qobject.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qstring.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/types.h \
+ $(CONTRIBDIR)/qemu/include/qemu/aes.h \
+ $(CONTRIBDIR)/qemu/include/qemu/atomic.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bitmap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bitops.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bswap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/compiler.h \
+ $(CONTRIBDIR)/qemu/include/qemu/error-report.h \
+ $(CONTRIBDIR)/qemu/include/qemu/event_notifier.h \
+ $(CONTRIBDIR)/qemu/include/qemu/hbitmap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/host-utils.h \
+ $(CONTRIBDIR)/qemu/include/qemu/iov.h \
+ $(CONTRIBDIR)/qemu/include/qemu/main-loop.h \
+ $(CONTRIBDIR)/qemu/include/qemu/module.h \
+ $(CONTRIBDIR)/qemu/include/qemu/notify.h \
+ $(CONTRIBDIR)/qemu/include/qemu/option.h \
+ $(CONTRIBDIR)/qemu/include/qemu/option_int.h \
+ $(CONTRIBDIR)/qemu/include/qemu/osdep.h \
+ $(CONTRIBDIR)/qemu/include/qemu/queue.h \
+ $(CONTRIBDIR)/qemu/include/qemu/sockets.h \
+ $(CONTRIBDIR)/qemu/include/qemu/thread-posix.h \
+ $(CONTRIBDIR)/qemu/include/qemu/thread.h \
+ $(CONTRIBDIR)/qemu/include/qemu/timer.h \
+ $(CONTRIBDIR)/qemu/include/qemu/typedefs.h \
+ $(CONTRIBDIR)/qemu/include/sysemu/sysemu.h \
+ $(CONTRIBDIR)/qemu/include/sysemu/os-posix.h \
+ $(CONTRIBDIR)/qemu/block/qcow2.h \
+ $(CONTRIBDIR)/qemu/block/qed.h
+
+noinst_HEADERS = \
+ $(noinst_HEADERS_qemu) \
+ qemu-block.h \
+ qemu-block-memory-types.h \
+ qb-coroutines.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/qemu \
+ -I$(CONTRIBDIR)/qemu/include \
+ -DGLUSTER_XLATOR
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) $(GLIB_CFLAGS)
+
+CLEANFILES =
+
+endif
diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c
new file mode 100644
index 000000000..106c59775
--- /dev/null
+++ b/xlators/features/qemu-block/src/bdrv-xlator.c
@@ -0,0 +1,397 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "inode.h"
+#include "syncop.h"
+#include "qemu-block.h"
+#include "block/block_int.h"
+
+typedef struct BDRVGlusterState {
+ inode_t *inode;
+} BDRVGlusterState;
+
+static QemuOptsList runtime_opts = {
+ .name = "gluster",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = "filename",
+ .type = QEMU_OPT_STRING,
+ .help = "GFID of file",
+ },
+ { /* end of list */ }
+ },
+};
+
+inode_t *
+qb_inode_from_filename (const char *filename)
+{
+ const char *iptr = NULL;
+ inode_t *inode = NULL;
+
+ iptr = filename + 17;
+ sscanf (iptr, "%p", &inode);
+
+ return inode;
+}
+
+
+int
+qb_inode_to_filename (inode_t *inode, char *filename, int size)
+{
+ return snprintf (filename, size, "gluster://inodep:%p", inode);
+}
+
+
+static fd_t *
+fd_from_bs (BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ return fd_anonymous (s->inode);
+}
+
+
+static int
+qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags)
+{
+ inode_t *inode = NULL;
+ BDRVGlusterState *s = bs->opaque;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ const char *filename = NULL;
+ char gfid_str[128];
+ int ret;
+ qb_conf_t *conf = THIS->private;
+
+ opts = qemu_opts_create_nofail(&runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -EINVAL;
+ }
+
+ filename = qemu_opt_get(opts, "filename");
+
+ /*
+ * gfid:<gfid> format means we're opening a backing image.
+ */
+ ret = sscanf(filename, "gluster://gfid:%s", gfid_str);
+ if (ret) {
+ loc_t loc = {0,};
+ struct iatt buf = {0,};
+ uuid_t gfid;
+
+ uuid_parse(gfid_str, gfid);
+
+ loc.inode = inode_find(conf->root_inode->table, gfid);
+ if (!loc.inode) {
+ loc.inode = inode_new(conf->root_inode->table);
+ uuid_copy(loc.inode->gfid, gfid);
+ }
+
+ uuid_copy(loc.gfid, loc.inode->gfid);
+ ret = syncop_lookup(FIRST_CHILD(THIS), &loc, NULL, &buf, NULL,
+ NULL);
+ if (ret) {
+ loc_wipe(&loc);
+ return -errno;
+ }
+
+ s->inode = inode_ref(loc.inode);
+ loc_wipe(&loc);
+ } else {
+ inode = qb_inode_from_filename (filename);
+ if (!inode)
+ return -EINVAL;
+
+ s->inode = inode_ref(inode);
+ }
+
+ return 0;
+}
+
+
+static int
+qemu_gluster_create (const char *filename, QEMUOptionParameter *options)
+{
+ uint64_t total_size = 0;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ struct iatt stat = {0, };
+ int ret = 0;
+
+ inode = qb_inode_from_filename (filename);
+ if (!inode)
+ return -EINVAL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
+ }
+ options++;
+ }
+
+ fd = fd_anonymous (inode);
+ if (!fd)
+ return -ENOMEM;
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat);
+ if (ret) {
+ fd_unref (fd);
+ return -errno;
+ }
+
+ if (stat.ia_size) {
+ /* format ONLY if the filesize is 0 bytes */
+ fd_unref (fd);
+ return -EFBIG;
+ }
+
+ if (total_size) {
+ ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size);
+ if (ret) {
+ fd_unref (fd);
+ return -errno;
+ }
+ }
+
+ fd_unref (fd);
+ return 0;
+}
+
+
+static int
+qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t size = 0;
+ struct iovec *iov = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+ if (!fd)
+ return -EIO;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0,
+ &iov, &count, &iobref);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */
+
+out:
+ GF_FREE (iov);
+ if (iobref)
+ iobref_unref (iobref);
+ fd_unref (fd);
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t size = 0;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {0, };
+ int ret = -ENOMEM;
+
+ fd = fd_from_bs (bs);
+ if (!fd)
+ return -EIO;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ iobuf = iobuf_get2 (THIS->ctx->iobuf_pool, size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ iobuf_unref (iobuf);
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov_unload (iobuf_ptr (iobuf), qiov->iov, qiov->niov); /* *choke!* */
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = size;
+
+ ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0);
+ if (ret < 0)
+ ret = -errno;
+
+out:
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+ fd_unref (fd);
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_flush (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_flush (FIRST_CHILD(THIS), fd);
+
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_fsync (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fsync (FIRST_CHILD(THIS), fd, 0);
+
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+static int
+qemu_gluster_truncate (BlockDriverState *bs, int64_t offset)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, offset);
+
+ fd_unref (fd);
+
+ if (ret < 0)
+ return ret;
+
+ return ret;
+}
+
+
+static int64_t
+qemu_gluster_getlength (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+ struct iatt iatt = {0, };
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt);
+ if (ret < 0)
+ return -1;
+
+ return iatt.ia_size;
+}
+
+
+static int64_t
+qemu_gluster_allocated_file_size (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+ struct iatt iatt = {0, };
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt);
+ if (ret < 0)
+ return -1;
+
+ return iatt.ia_blocks * 512;
+}
+
+
+static void
+qemu_gluster_close (BlockDriverState *bs)
+{
+ BDRVGlusterState *s = NULL;
+
+ s = bs->opaque;
+
+ inode_unref (s->inode);
+
+ return;
+}
+
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+
+
+static BlockDriver bdrv_gluster = {
+ .format_name = "gluster",
+ .protocol_name = "gluster",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+ .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_co_readv = qemu_gluster_co_readv,
+ .bdrv_co_writev = qemu_gluster_co_writev,
+ .bdrv_co_flush_to_os = qemu_gluster_co_flush,
+ .bdrv_co_flush_to_disk = qemu_gluster_co_fsync,
+ .bdrv_truncate = qemu_gluster_truncate,
+ .create_options = qemu_gluster_create_options,
+};
+
+
+static void bdrv_gluster_init(void)
+{
+ bdrv_register(&bdrv_gluster);
+}
+
+
+block_init(bdrv_gluster_init);
diff --git a/xlators/features/qemu-block/src/bh-syncop.c b/xlators/features/qemu-block/src/bh-syncop.c
new file mode 100644
index 000000000..e8686f6d4
--- /dev/null
+++ b/xlators/features/qemu-block/src/bh-syncop.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "block/aio.h"
+
+void
+qemu_bh_schedule (QEMUBH *bh)
+{
+ return;
+}
+
+void
+qemu_bh_cancel (QEMUBH *bh)
+{
+ return;
+}
+
+void
+qemu_bh_delete (QEMUBH *bh)
+{
+
+}
+
+QEMUBH *
+qemu_bh_new (QEMUBHFunc *cb, void *opaque)
+{
+ return NULL;
+}
diff --git a/xlators/features/qemu-block/src/clock-timer.c b/xlators/features/qemu-block/src/clock-timer.c
new file mode 100644
index 000000000..fcbec6ad1
--- /dev/null
+++ b/xlators/features/qemu-block/src/clock-timer.c
@@ -0,0 +1,60 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "qemu/timer.h"
+
+QEMUClock *vm_clock;
+int use_rt_clock = 0;
+
+QEMUTimer *qemu_new_timer (QEMUClock *clock, int scale,
+ QEMUTimerCB *cb, void *opaque)
+{
+ return NULL;
+}
+
+int64_t qemu_get_clock_ns (QEMUClock *clock)
+{
+ return 0;
+}
+
+void qemu_mod_timer (QEMUTimer *ts, int64_t expire_time)
+{
+ return;
+}
+
+void qemu_free_timer (QEMUTimer *ts)
+{
+
+}
+
+void qemu_del_timer (QEMUTimer *ts)
+{
+
+}
+
+bool qemu_aio_wait()
+{
+ synctask_wake (synctask_get());
+ synctask_yield (synctask_get());
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/coroutine-synctask.c b/xlators/features/qemu-block/src/coroutine-synctask.c
new file mode 100644
index 000000000..e43988a95
--- /dev/null
+++ b/xlators/features/qemu-block/src/coroutine-synctask.c
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "qemu-block.h"
+
+/*
+ * This code serves as the bridge from the main glusterfs context to the qemu
+ * coroutine context via synctask. We create a single threaded syncenv with a
+ * single synctask responsible for processing a queue of coroutines. The qemu
+ * code invoked from within the synctask function handlers uses the ucontext
+ * coroutine implementation and scheduling logic internal to qemu. This
+ * effectively donates a thread of execution to qemu and its internal coroutine
+ * management.
+ *
+ * NOTE: The existence of concurrent synctasks has proven quite racy with regard
+ * to qemu coroutine management, particularly related to the lifecycle
+ * differences with top-level synctasks and internally created coroutines and
+ * interactions with qemu-internal queues (and locks, in turn). We explicitly
+ * disallow this scenario, via the queue, until it is more well supported.
+ */
+
+static struct {
+ struct list_head queue;
+ gf_lock_t lock;
+ struct synctask *task;
+} qb_co;
+
+static void
+init_qbco()
+{
+ INIT_LIST_HEAD(&qb_co.queue);
+ LOCK_INIT(&qb_co.lock);
+}
+
+static int
+synctask_nop_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
+
+static int
+qb_synctask_wrap (void *opaque)
+{
+ qb_local_t *qb_local, *tmp;
+
+ LOCK(&qb_co.lock);
+
+ while (!list_empty(&qb_co.queue)) {
+ list_for_each_entry_safe(qb_local, tmp, &qb_co.queue, list) {
+ list_del_init(&qb_local->list);
+ break;
+ }
+
+ UNLOCK(&qb_co.lock);
+
+ qb_local->synctask_fn(qb_local);
+ /* qb_local is now unwound and gone! */
+
+ LOCK(&qb_co.lock);
+ }
+
+ qb_co.task = NULL;
+
+ UNLOCK(&qb_co.lock);
+
+ return 0;
+}
+
+int
+qb_coroutine (call_frame_t *frame, synctask_fn_t fn)
+{
+ qb_local_t *qb_local = NULL;
+ qb_conf_t *qb_conf = NULL;
+ static int init = 0;
+
+ qb_local = frame->local;
+ qb_local->synctask_fn = fn;
+ qb_conf = frame->this->private;
+
+ if (!init) {
+ init = 1;
+ init_qbco();
+ }
+
+ LOCK(&qb_co.lock);
+
+ if (!qb_co.task)
+ qb_co.task = synctask_create(qb_conf->env, qb_synctask_wrap,
+ synctask_nop_cbk, frame, NULL);
+
+ list_add_tail(&qb_local->list, &qb_co.queue);
+
+ UNLOCK(&qb_co.lock);
+
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/monitor-logging.c b/xlators/features/qemu-block/src/monitor-logging.c
new file mode 100644
index 000000000..d37c37f0f
--- /dev/null
+++ b/xlators/features/qemu-block/src/monitor-logging.c
@@ -0,0 +1,50 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "qemu-block-memory-types.h"
+
+#include "block/block_int.h"
+
+Monitor *cur_mon;
+
+int
+monitor_cur_is_qmp()
+{
+ /* No QMP support here */
+ return 0;
+}
+
+void
+monitor_set_error (Monitor *mon, QError *qerror)
+{
+ /* NOP here */
+ return;
+}
+
+
+void
+monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+{
+ char buf[4096];
+
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", buf);
+}
diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c
new file mode 100644
index 000000000..7c52adb21
--- /dev/null
+++ b/xlators/features/qemu-block/src/qb-coroutines.c
@@ -0,0 +1,662 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "inode.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "qemu-block-memory-types.h"
+#include "qemu-block.h"
+#include "qb-coroutines.h"
+
+
+int
+qb_format_and_resume (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ char filename[64];
+ char base_filename[128];
+ int use_base = 0;
+ qb_inode_t *qb_inode = NULL;
+ Error *local_err = NULL;
+ fd_t *fd = NULL;
+ dict_t *xattr = NULL;
+ qb_conf_t *qb_conf = NULL;
+ int ret = -1;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+ qb_conf = frame->this->private;
+
+ qb_inode_to_filename (inode, filename, 64);
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+
+ /*
+ * See if the caller specified a backing image.
+ */
+ if (!uuid_is_null(qb_inode->backing_gfid) || qb_inode->backing_fname) {
+ loc_t loc = {0,};
+ char gfid_str[64];
+ struct iatt buf;
+
+ if (!uuid_is_null(qb_inode->backing_gfid)) {
+ loc.inode = inode_find(qb_conf->root_inode->table,
+ qb_inode->backing_gfid);
+ if (!loc.inode) {
+ loc.inode = inode_new(qb_conf->root_inode->table);
+ uuid_copy(loc.inode->gfid,
+ qb_inode->backing_gfid);
+ }
+ uuid_copy(loc.gfid, loc.inode->gfid);
+ } else if (qb_inode->backing_fname) {
+ loc.inode = inode_new(qb_conf->root_inode->table);
+ loc.name = qb_inode->backing_fname;
+ loc.parent = inode_parent(inode, NULL, NULL);
+ loc_path(&loc, loc.name);
+ }
+
+ /*
+ * Lookup the backing image. Verify existence and/or get the
+ * gfid if we don't already have it.
+ */
+ ret = syncop_lookup(FIRST_CHILD(frame->this), &loc, NULL, &buf,
+ NULL, NULL);
+ GF_FREE(qb_inode->backing_fname);
+ if (ret) {
+ loc_wipe(&loc);
+ ret = errno;
+ goto err;
+ }
+
+ uuid_copy(qb_inode->backing_gfid, buf.ia_gfid);
+ loc_wipe(&loc);
+
+ /*
+ * We pass the filename of the backing image into the qemu block
+ * subsystem as the associated gfid. This is embedded into the
+ * clone image and passed along to the gluster bdrv backend when
+ * the block subsystem needs to operate on the backing image on
+ * behalf of the clone.
+ */
+ uuid_unparse(qb_inode->backing_gfid, gfid_str);
+ snprintf(base_filename, sizeof(base_filename),
+ "gluster://gfid:%s", gfid_str);
+ use_base = 1;
+ }
+
+ bdrv_img_create (filename, qb_inode->fmt,
+ use_base ? base_filename : NULL, 0, 0, qb_inode->size,
+ 0, &local_err, true);
+
+ if (error_is_set (&local_err)) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "%s",
+ error_get_pretty (local_err));
+ error_free (local_err);
+ QB_STUB_UNWIND (stub, -1, EIO);
+ return 0;
+ }
+
+ fd = fd_anonymous (inode);
+ if (!fd) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not create anonymous fd for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ xattr = dict_new ();
+ if (!xattr) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not allocate xattr dict for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ fd_unref (fd);
+ return 0;
+ }
+
+ ret = dict_set_str (xattr, qb_conf->qb_xattr_key, local->fmt);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not dict_set for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ fd_unref (fd);
+ dict_unref (xattr);
+ return 0;
+ }
+
+ ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0);
+ if (ret) {
+ ret = errno;
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to setxattr for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ret);
+ fd_unref (fd);
+ dict_unref (xattr);
+ return 0;
+ }
+
+ fd_unref (fd);
+ dict_unref (xattr);
+
+ QB_STUB_UNWIND (stub, 0, 0);
+
+ return 0;
+
+err:
+ QB_STUB_UNWIND(stub, -1, ret);
+ return 0;
+}
+
+
+static BlockDriverState *
+qb_bs_create (inode_t *inode, const char *fmt)
+{
+ char filename[64];
+ BlockDriverState *bs = NULL;
+ BlockDriver *drv = NULL;
+ int op_errno = 0;
+ int ret = 0;
+
+ bs = bdrv_new (uuid_utoa (inode->gfid));
+ if (!bs) {
+ op_errno = ENOMEM;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not allocate @bdrv for gfid:%s",
+ uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ drv = bdrv_find_format (fmt);
+ if (!drv) {
+ op_errno = EINVAL;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unknown file format: %s for gfid:%s",
+ fmt, uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ qb_inode_to_filename (inode, filename, 64);
+
+ ret = bdrv_open (bs, filename, NULL, BDRV_O_RDWR, drv);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to bdrv_open() gfid:%s (%s)",
+ uuid_utoa (inode->gfid), strerror (op_errno));
+ goto err;
+ }
+
+ return bs;
+err:
+ errno = op_errno;
+ return NULL;
+}
+
+
+int
+qb_co_open (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+ qb_inode->refcnt++;
+
+ QB_STUB_RESUME (stub);
+
+ return 0;
+}
+
+
+int
+qb_co_writev (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ QEMUIOVector qiov = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ qemu_iovec_init_external (&qiov, stub->args.vector, stub->args.count);
+
+ ret = bdrv_pwritev (qb_inode->bs, stub->args.offset, &qiov);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_co_readv (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ if (stub->args.offset >= qb_inode->size) {
+ QB_STUB_UNWIND (stub, 0, 0);
+ return 0;
+ }
+
+ iobuf = iobuf_get2 (frame->this->ctx->iobuf_pool, stub->args.size);
+ if (!iobuf) {
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ iobuf_unref (iobuf);
+ return 0;
+ }
+
+ if (iobref_add (iobref, iobuf) < 0) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ ret = bdrv_pread (qb_inode->bs, stub->args.offset, iobuf_ptr (iobuf),
+ stub->args.size);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ iobref_unref (iobref);
+ return 0;
+ }
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = ret;
+
+ stub->args_cbk.vector = iov_dup (&iov, 1);
+ stub->args_cbk.count = 1;
+ stub->args_cbk.iobref = iobref;
+
+ QB_STUB_UNWIND (stub, ret, 0);
+
+ return 0;
+}
+
+
+int
+qb_co_fsync (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_flush (qb_inode->bs);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+static void
+qb_update_size_xattr (xlator_t *this, fd_t *fd, const char *fmt, off_t offset)
+{
+ char val[QB_XATTR_VAL_MAX];
+ qb_conf_t *qb_conf = NULL;
+ dict_t *xattr = NULL;
+
+ qb_conf = this->private;
+
+ snprintf (val, QB_XATTR_VAL_MAX, "%s:%llu",
+ fmt, (long long unsigned) offset);
+
+ xattr = dict_new ();
+ if (!xattr)
+ return;
+
+ if (dict_set_str (xattr, qb_conf->qb_xattr_key, val) != 0) {
+ dict_unref (xattr);
+ return;
+ }
+
+ syncop_fsetxattr (FIRST_CHILD(this), fd, xattr, 0);
+ dict_unref (xattr);
+}
+
+
+int
+qb_co_truncate (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.prestat);
+ stub->args_cbk.prestat.ia_size = qb_inode->size;
+
+ ret = bdrv_truncate (qb_inode->bs, stub->args.offset);
+ if (ret < 0)
+ goto out;
+
+ offset = bdrv_getlength (qb_inode->bs);
+
+ qb_inode->size = offset;
+
+ syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.poststat);
+ stub->args_cbk.poststat.ia_size = qb_inode->size;
+
+ qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size);
+
+out:
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_co_close (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ BlockDriverState *bs = NULL;
+
+ local = opaque;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (THIS, inode);
+
+ if (!--qb_inode->refcnt) {
+ bs = qb_inode->bs;
+ qb_inode->bs = NULL;
+ bdrv_delete (bs);
+ }
+
+ frame = local->frame;
+ frame->local = NULL;
+ qb_local_free (THIS, local);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+int
+qb_snapshot_create (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ QEMUSnapshotInfo sn;
+ struct timeval tv = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ memset (&sn, 0, sizeof (sn));
+ pstrcpy (sn.name, sizeof(sn.name), local->name);
+ gettimeofday (&tv, NULL);
+ sn.date_sec = tv.tv_sec;
+ sn.date_nsec = tv.tv_usec * 1000;
+
+ ret = bdrv_snapshot_create (qb_inode->bs, &sn);
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_snapshot_delete (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_snapshot_delete (qb_inode->bs, local->name);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_snapshot_goto (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_snapshot_goto (qb_inode->bs, local->name);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/qb-coroutines.h b/xlators/features/qemu-block/src/qb-coroutines.h
new file mode 100644
index 000000000..583319f3b
--- /dev/null
+++ b/xlators/features/qemu-block/src/qb-coroutines.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QB_COROUTINES_H
+#define __QB_COROUTINES_H
+
+#include "syncop.h"
+#include "call-stub.h"
+#include "block/block_int.h"
+#include "monitor/monitor.h"
+
+int qb_format_and_resume (void *opaque);
+int qb_snapshot_create (void *opaque);
+int qb_snapshot_delete (void *opaque);
+int qb_snapshot_goto (void *opaque);
+int qb_co_open (void *opaque);
+int qb_co_close (void *opaque);
+int qb_co_writev (void *opaque);
+int qb_co_readv (void *opaque);
+int qb_co_fsync (void *opaque);
+int qb_co_truncate (void *opaque);
+
+#endif /* __QB_COROUTINES_H */
diff --git a/xlators/features/qemu-block/src/qemu-block-memory-types.h b/xlators/features/qemu-block/src/qemu-block-memory-types.h
new file mode 100644
index 000000000..267b3893f
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block-memory-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __QB_MEM_TYPES_H__
+#define __QB_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_qb_mem_types_ {
+ gf_qb_mt_qb_conf_t = gf_common_mt_end + 1,
+ gf_qb_mt_qb_inode_t,
+ gf_qb_mt_qb_local_t,
+ gf_qb_mt_coroutinesynctask_t,
+ gf_qb_mt_end
+};
+#endif
+
diff --git a/xlators/features/qemu-block/src/qemu-block.c b/xlators/features/qemu-block/src/qemu-block.c
new file mode 100644
index 000000000..48bbf3140
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block.c
@@ -0,0 +1,1140 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "inode.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "qemu-block-memory-types.h"
+#include "qemu-block.h"
+#include "qb-coroutines.h"
+
+
+qb_inode_t *
+__qb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ qb_inode_t *qb_inode = NULL;
+
+ __inode_ctx_get (inode, this, &value);
+ qb_inode = (qb_inode_t *)(unsigned long) value;
+
+ return qb_inode;
+}
+
+
+qb_inode_t *
+qb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ LOCK (&inode->lock);
+ {
+ qb_inode = __qb_inode_ctx_get (this, inode);
+ }
+ UNLOCK (&inode->lock);
+
+ return qb_inode;
+}
+
+
+qb_inode_t *
+qb_inode_ctx_del (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ qb_inode_t *qb_inode = NULL;
+
+ inode_ctx_del (inode, this, &value);
+ qb_inode = (qb_inode_t *)(unsigned long) value;
+
+ return qb_inode;
+}
+
+
+int
+qb_inode_cleanup (xlator_t *this, inode_t *inode, int warn)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_del (this, inode);
+
+ if (!qb_inode)
+ return 0;
+
+ if (warn)
+ gf_log (this->name, GF_LOG_WARNING,
+ "inode %s no longer block formatted",
+ uuid_utoa (inode->gfid));
+
+ /* free (qb_inode->bs); */
+
+ GF_FREE (qb_inode);
+
+ return 0;
+}
+
+
+int
+qb_iatt_fixup (xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, inode);
+ if (!qb_inode)
+ return 0;
+
+ iatt->ia_size = qb_inode->size;
+
+ return 0;
+}
+
+
+int
+qb_format_extract (xlator_t *this, char *format, inode_t *inode)
+{
+ char *s, *save;
+ uint64_t size = 0;
+ char fmt[QB_XATTR_VAL_MAX+1] = {0, };
+ qb_inode_t *qb_inode = NULL;
+ char *formatstr = NULL;
+ uuid_t gfid = {0,};
+ char gfid_str[64] = {0,};
+ int ret;
+
+ strncpy(fmt, format, QB_XATTR_VAL_MAX);
+
+ s = strtok_r(fmt, ":", &save);
+ if (!s)
+ goto invalid;
+ formatstr = gf_strdup(s);
+
+ s = strtok_r(NULL, ":", &save);
+ if (!s)
+ goto invalid;
+ if (gf_string2bytesize (s, &size))
+ goto invalid;
+ if (!size)
+ goto invalid;
+
+ s = strtok_r(NULL, "\0", &save);
+ if (s && !strncmp(s, "<gfid:", strlen("<gfid:"))) {
+ /*
+ * Check for valid gfid backing image specifier.
+ */
+ if (strlen(s) + 1 > sizeof(gfid_str))
+ goto invalid;
+ ret = sscanf(s, "<gfid:%[^>]s", gfid_str);
+ if (ret == 1) {
+ ret = uuid_parse(gfid_str, gfid);
+ if (ret < 0)
+ goto invalid;
+ }
+ }
+
+ qb_inode = qb_inode_ctx_get (this, inode);
+ if (!qb_inode)
+ qb_inode = GF_CALLOC (1, sizeof (*qb_inode),
+ gf_qb_mt_qb_inode_t);
+ if (!qb_inode) {
+ GF_FREE(formatstr);
+ return ENOMEM;
+ }
+
+ strncpy(qb_inode->fmt, formatstr, QB_XATTR_VAL_MAX);
+ qb_inode->size = size;
+
+ /*
+ * If a backing gfid was not specified, interpret any remaining bytes
+ * associated with a backing image as a filename local to the parent
+ * directory. The format processing will validate further.
+ */
+ if (!uuid_is_null(gfid))
+ uuid_copy(qb_inode->backing_gfid, gfid);
+ else if (s)
+ qb_inode->backing_fname = gf_strdup(s);
+
+ inode_ctx_set (inode, this, (void *)&qb_inode);
+
+ GF_FREE(formatstr);
+
+ return 0;
+
+invalid:
+ GF_FREE(formatstr);
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid format '%s' in inode %s", format,
+ uuid_utoa (inode->gfid));
+ return EINVAL;
+}
+
+
+void
+qb_local_free (xlator_t *this, qb_local_t *local)
+{
+ if (local->inode)
+ inode_unref (local->inode);
+ if (local->fd)
+ fd_unref (local->fd);
+ GF_FREE (local);
+}
+
+
+int
+qb_local_init (call_frame_t *frame)
+{
+ qb_local_t *qb_local = NULL;
+
+ qb_local = GF_CALLOC (1, sizeof (*qb_local), gf_qb_mt_qb_local_t);
+ if (!qb_local)
+ return -1;
+ INIT_LIST_HEAD(&qb_local->list);
+
+ qb_local->frame = frame;
+ frame->local = qb_local;
+
+ return 0;
+}
+
+
+int
+qb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ char *format = NULL;
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+ /*
+ * Cache the root inode for dealing with backing images. The format
+ * coroutine and the gluster qemu backend driver both use the root inode
+ * table to verify and/or redirect I/O to the backing image via
+ * anonymous fd's.
+ */
+ if (!conf->root_inode && __is_root_gfid(inode->gfid))
+ conf->root_inode = inode_ref(inode);
+
+ if (!xdata)
+ goto out;
+
+ if (dict_get_str (xdata, conf->qb_xattr_key, &format))
+ goto out;
+
+ if (!format) {
+ qb_inode_cleanup (this, inode, 1);
+ goto out;
+ }
+
+ op_errno = qb_format_extract (this, format, inode);
+ if (op_errno)
+ op_ret = -1;
+
+ qb_iatt_fixup (this, inode, buf);
+out:
+ QB_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+
+int
+qb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata)
+ goto enomem;
+
+ if (dict_set_int32 (xdata, conf->qb_xattr_key, 0))
+ goto enomem;
+
+ STACK_WIND (frame, qb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref (xdata);
+ return 0;
+enomem:
+ QB_STACK_UNWIND (lookup, frame, -1, ENOMEM, 0, 0, 0, 0);
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
+}
+
+
+int
+qb_setxattr_format (call_frame_t *frame, xlator_t *this, call_stub_t *stub,
+ dict_t *xattr, inode_t *inode)
+{
+ char *format = NULL;
+ int op_errno = 0;
+ qb_local_t *qb_local = NULL;
+ data_t *data = NULL;
+ qb_inode_t *qb_inode;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-format"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ format = alloca (data->len + 1);
+ memcpy (format, data->data, data->len);
+ format[data->len] = 0;
+
+ op_errno = qb_format_extract (this, format, inode);
+ if (op_errno) {
+ QB_STUB_UNWIND (stub, -1, op_errno);
+ return 0;
+ }
+ qb_inode = qb_inode_ctx_get(this, inode);
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+
+ snprintf(qb_local->fmt, QB_XATTR_VAL_MAX, "%s:%lu", qb_inode->fmt,
+ qb_inode->size);
+
+ qb_coroutine (frame, qb_format_and_resume);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_snapshot_create (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_create);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_snapshot_delete (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_delete);
+
+ return 0;
+}
+
+int
+qb_setxattr_snapshot_goto (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_goto);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_common (call_frame_t *frame, xlator_t *this, call_stub_t *stub,
+ dict_t *xattr, inode_t *inode)
+{
+ data_t *data = NULL;
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-format"))) {
+ qb_setxattr_format (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) {
+ qb_setxattr_snapshot_create (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) {
+ qb_setxattr_snapshot_delete (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) {
+ qb_setxattr_snapshot_goto (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ QB_STUB_RESUME (stub);
+
+ return 0;
+}
+
+
+int
+qb_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ stub = fop_setxattr_stub (frame, default_setxattr_resume, loc, xattr,
+ flags, xdata);
+ if (!stub)
+ goto enomem;
+
+ qb_setxattr_common (frame, this, stub, xattr, loc->inode);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (setxattr, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+
+int
+qb_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr,
+ flags, xdata);
+ if (!stub)
+ goto enomem;
+
+ qb_setxattr_common (frame, this, stub, xattr, fd->inode);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+
+int
+qb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ qb_local_t *qb_local = NULL;
+
+ qb_local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ if (!qb_inode_ctx_get (this, qb_local->inode))
+ goto unwind;
+
+ stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno, fd, xdata);
+ if (!stub) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ qb_local->stub = stub;
+
+ qb_coroutine (frame, qb_co_open);
+
+ return 0;
+unwind:
+ QB_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+
+int
+qb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, loc->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (loc->inode);
+ qb_local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, qb_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+enomem:
+ QB_STACK_UNWIND (open, frame, -1, ENOMEM, 0, 0);
+ return 0;
+}
+
+
+int
+qb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_writev_stub (frame, NULL, fd, vector, count,
+ offset, flags, iobref, xdata);
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_writev);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (writev, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_readv_stub (frame, NULL, fd, size, offset,
+ flags, xdata);
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_readv);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int dsync,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, dsync, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_fsync_stub (frame, NULL, fd, dsync, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_fsync);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_flush_stub (frame, NULL, fd, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_fsync);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (flush, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+static int32_t
+qb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ qb_conf_t *conf = this->private;
+ gf_dirent_t *entry;
+ char *format;
+
+ list_for_each_entry(entry, &entries->list, list) {
+ if (!entry->inode || !entry->dict)
+ continue;
+
+ format = NULL;
+ if (dict_get_str(entry->dict, conf->qb_xattr_key, &format))
+ continue;
+
+ if (!format) {
+ qb_inode_cleanup(this, entry->inode, 1);
+ continue;
+ }
+
+ if (qb_format_extract(this, format, entry->inode))
+ continue;
+
+ qb_iatt_fixup(this, entry->inode, &entry->d_stat);
+ }
+
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+static int32_t
+qb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ qb_conf_t *conf = this->private;
+
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata)
+ goto enomem;
+
+ if (dict_set_int32 (xdata, conf->qb_xattr_key, 0))
+ goto enomem;
+
+ STACK_WIND(frame, qb_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ dict_unref(xdata);
+ return 0;
+
+enomem:
+ QB_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
+}
+
+int
+qb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, loc->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (loc->inode);
+ qb_local->fd = fd_anonymous (loc->inode);
+
+ qb_local->stub = fop_truncate_stub (frame, NULL, loc, offset, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_truncate);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (truncate, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_ftruncate_stub (frame, NULL, fd, offset, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_truncate);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, iatt);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (stat, frame, op_ret, op_errno, iatt, xdata);
+
+ return 0;
+}
+
+int
+qb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, loc->inode))
+ frame->local = inode_ref (loc->inode);
+
+ STACK_WIND (frame, qb_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+
+int
+qb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, iatt);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (fstat, frame, op_ret, op_errno, iatt, xdata);
+
+ return 0;
+}
+
+
+int
+qb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, fd->inode))
+ frame->local = inode_ref (fd->inode);
+
+ STACK_WIND (frame, qb_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+
+int
+qb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, pre);
+ qb_iatt_fixup (this, inode, post);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (setattr, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+
+int
+qb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int valid, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, loc->inode))
+ frame->local = inode_ref (loc->inode);
+
+ STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, buf, valid, xdata);
+ return 0;
+}
+
+
+int
+qb_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, pre);
+ qb_iatt_fixup (this, inode, post);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+
+int
+qb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int valid, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, fd->inode))
+ frame->local = inode_ref (fd->inode);
+
+ STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid, xdata);
+ return 0;
+}
+
+
+int
+qb_forget (xlator_t *this, inode_t *inode)
+{
+ return qb_inode_cleanup (this, inode, 0);
+}
+
+
+int
+qb_release (xlator_t *this, fd_t *fd)
+{
+ call_frame_t *frame = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate frame. "
+ "Leaking QEMU BlockDriverState");
+ return -1;
+ }
+
+ if (qb_local_init (frame) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate local. "
+ "Leaking QEMU BlockDriverState");
+ STACK_DESTROY (frame->root);
+ return -1;
+ }
+
+ if (qb_coroutine (frame, qb_co_close) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate coroutine. "
+ "Leaking QEMU BlockDriverState");
+ qb_local_free (this, frame->local);
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+
+ return 0;
+}
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_qb_mt_end + 1);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init "
+ "failed");
+ return ret;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ qb_conf_t *conf = NULL;
+ int32_t ret = -1;
+ static int bdrv_inited = 0;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: qemu-block (%s) not configured with exactly "
+ "one child", this->name);
+ goto out;
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_qb_mt_qb_conf_t);
+ if (!conf)
+ goto out;
+
+ /* configure 'option window-size <size>' */
+ GF_OPTION_INIT ("default-password", conf->default_password, str, out);
+
+ /* qemu coroutines use "co_mutex" for synchronizing among themselves.
+ However "co_mutex" itself is not threadsafe if the coroutine framework
+ is multithreaded (which usually is not). However synctasks are
+ fundamentally multithreaded, so for now create a syncenv which has
+ scaling limits set to max 1 thread so that the qemu coroutines can
+ execute "safely".
+
+ Future work: provide an implementation of "co_mutex" which is
+ threadsafe and use the global multithreaded ctx->env syncenv.
+ */
+ conf->env = syncenv_new (0, 1, 1);
+
+ this->private = conf;
+
+ ret = 0;
+
+ snprintf (conf->qb_xattr_key, QB_XATTR_KEY_MAX, QB_XATTR_KEY_FMT,
+ this->name);
+
+ cur_mon = (void *) 1;
+
+ if (!bdrv_inited) {
+ bdrv_init ();
+ bdrv_inited = 1;
+ }
+
+out:
+ if (ret)
+ GF_FREE (conf);
+
+ return ret;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ this->private = NULL;
+
+ if (conf->root_inode)
+ inode_unref(conf->root_inode);
+ GF_FREE (conf);
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = qb_lookup,
+ .fsetxattr = qb_fsetxattr,
+ .setxattr = qb_setxattr,
+ .open = qb_open,
+ .writev = qb_writev,
+ .readv = qb_readv,
+ .fsync = qb_fsync,
+ .truncate = qb_truncate,
+ .ftruncate = qb_ftruncate,
+ .stat = qb_stat,
+ .fstat = qb_fstat,
+ .setattr = qb_setattr,
+ .fsetattr = qb_fsetattr,
+ .flush = qb_flush,
+/*
+ .getxattr = qb_getxattr,
+ .fgetxattr = qb_fgetxattr
+*/
+ .readdirp = qb_readdirp,
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = qb_forget,
+ .release = qb_release,
+};
+
+
+struct xlator_dumpops dumpops = {
+};
+
+
+struct volume_options options[] = {
+ { .key = {"default-password"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "Default password for the AES encrypted block images."
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/qemu-block/src/qemu-block.h b/xlators/features/qemu-block/src/qemu-block.h
new file mode 100644
index 000000000..c95f2799a
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block.h
@@ -0,0 +1,109 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QEMU_BLOCK_H
+#define __QEMU_BLOCK_H
+
+#include "syncop.h"
+#include "call-stub.h"
+#include "block/block_int.h"
+#include "monitor/monitor.h"
+
+/* QB_XATTR_KEY_FMT is the on-disk xattr stored in the inode which
+ indicates that the file must be "interpreted" by the block format
+ logic. The value of the key is of the pattern:
+
+ "format:virtual_size"
+
+ e.g
+
+ "qcow2:20GB" or "qed:100GB"
+
+ The format and virtual size are colon separated. The format is
+ a case sensitive string which qemu recognizes. virtual_size is
+ specified as a size which glusterfs recognizes as size (i.e.,
+ value accepted by gf_string2bytesize())
+*/
+#define QB_XATTR_KEY_FMT "trusted.glusterfs.%s.format"
+
+#define QB_XATTR_KEY_MAX 64
+
+#define QB_XATTR_VAL_MAX 64
+
+
+typedef struct qb_inode {
+ char fmt[QB_XATTR_VAL_MAX]; /* this is only the format, not "format:size" */
+ size_t size; /* virtual size in bytes */
+ BlockDriverState *bs;
+ int refcnt;
+ uuid_t backing_gfid;
+ char *backing_fname;
+} qb_inode_t;
+
+
+typedef struct qb_conf {
+ Monitor *mon;
+ struct syncenv *env;
+ char qb_xattr_key[QB_XATTR_KEY_MAX];
+ char *default_password;
+ inode_t *root_inode;
+} qb_conf_t;
+
+
+typedef struct qb_local {
+ call_frame_t *frame; /* backpointer */
+ call_stub_t *stub;
+ inode_t *inode;
+ fd_t *fd;
+ char fmt[QB_XATTR_VAL_MAX+1];
+ char name[256];
+ synctask_fn_t synctask_fn;
+ struct list_head list;
+} qb_local_t;
+
+void qb_local_free (xlator_t *this, qb_local_t *local);
+int qb_coroutine (call_frame_t *frame, synctask_fn_t fn);
+inode_t *qb_inode_from_filename (const char *filename);
+int qb_inode_to_filename (inode_t *inode, char *filename, int size);
+int qb_format_extract (xlator_t *this, char *format, inode_t *inode);
+
+qb_inode_t *qb_inode_ctx_get (xlator_t *this, inode_t *inode);
+
+#define QB_STACK_UNWIND(typ, frame, args ...) do { \
+ qb_local_t *__local = frame->local; \
+ xlator_t *__this = frame->this; \
+ \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (typ, frame, args); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#define QB_STUB_UNWIND(stub, op_ret, op_errno) do { \
+ qb_local_t *__local = stub->frame->local; \
+ xlator_t *__this = stub->frame->this; \
+ \
+ stub->frame->local = NULL; \
+ call_unwind_error (stub, op_ret, op_errno); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#define QB_STUB_RESUME(stub_errno) do { \
+ qb_local_t *__local = stub->frame->local; \
+ xlator_t *__this = stub->frame->this; \
+ \
+ stub->frame->local = NULL; \
+ call_resume (stub); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#endif /* !__QEMU_BLOCK_H */
diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am
index e8ab4cb24..15e46629e 100644
--- a/xlators/features/quiesce/src/Makefile.am
+++ b/xlators/features/quiesce/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = quiesce.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quiesce_la_LDFLAGS = -module -avoidversion
+quiesce_la_LDFLAGS = -module -avoid-version
quiesce_la_SOURCES = quiesce.c
quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = quiesce.h quiesce-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
index 00f7aa4f1..6e582f424 100644
--- a/xlators/features/quiesce/src/quiesce-mem-types.h
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUIESCE_MEM_TYPES_H__
#define __QUIESCE_MEM_TYPES_H__
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
index 6c76a0bf5..24c7dc6ed 100644
--- a/xlators/features/quiesce/src/quiesce.c
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -34,29 +24,22 @@
void
gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local)
{
- quiesce_priv_t *priv = NULL;
-
if (!local || !this || !this->private)
return;
- priv = this->private;
-
if (local->loc.inode)
loc_wipe (&local->loc);
if (local->fd)
fd_unref (local->fd);
- if (local->name)
- GF_FREE (local->name);
- if (local->volname)
- GF_FREE (local->volname);
+ GF_FREE (local->name);
+ GF_FREE (local->volname);
if (local->dict)
dict_unref (local->dict);
if (local->iobref)
iobref_unref (local->iobref);
- if (local->vector)
- GF_FREE (local->vector);
+ GF_FREE (local->vector);
- mem_put (priv->local_pool, local);
+ mem_put (local);
}
call_stub_t *
@@ -108,7 +91,6 @@ gf_quiesce_timeout (void *data)
{
xlator_t *this = NULL;
quiesce_priv_t *priv = NULL;
- int need_dequeue = 0;
this = data;
priv = this->private;
@@ -117,7 +99,6 @@ gf_quiesce_timeout (void *data)
LOCK (&priv->lock);
{
priv->pass_through = _gf_true;
- need_dequeue = (priv->queue_size)? 1:0;
}
UNLOCK (&priv->lock);
@@ -130,7 +111,7 @@ void
gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
{
quiesce_priv_t *priv = NULL;
- struct timeval timeout = {0,};
+ struct timespec timeout = {0,};
priv = this->private;
if (!priv) {
@@ -148,7 +129,7 @@ gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
if (!priv->timer) {
timeout.tv_sec = 20;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
priv->timer = gf_timer_call_after (this->ctx,
timeout,
@@ -168,12 +149,9 @@ quiesce_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *dict, struct iatt *postparent)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
@@ -200,23 +178,21 @@ out:
int32_t
quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_stat_stub (frame, default_stat_resume,
- &local->loc);
+ &local->loc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -224,7 +200,7 @@ quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -233,22 +209,19 @@ out:
int32_t
quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_access_stub (frame, default_access_resume,
- &local->loc, local->flag);
+ &local->loc, local->flag, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (access, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -256,7 +229,7 @@ quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -266,23 +239,20 @@ out:
int32_t
quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readlink_stub (frame, default_readlink_resume,
- &local->loc, local->size);
+ &local->loc, local->size, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -290,7 +260,7 @@ quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -299,24 +269,21 @@ out:
int32_t
quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_open_stub (frame, default_open_resume,
&local->loc, local->flag, local->fd,
- local->wbflags);
+ xdata);
if (!stub) {
STACK_UNWIND_STRICT (open, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -324,7 +291,7 @@ quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -334,23 +301,21 @@ out:
int32_t
quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readv_stub (frame, default_readv_resume,
- local->fd, local->size, local->offset);
+ local->fd, local->size, local->offset,
+ local->io_flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
+ NULL, 0, NULL, NULL, NULL);
goto out;
}
@@ -359,7 +324,7 @@ quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
+ stbuf, iobref, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -368,22 +333,19 @@ out:
int32_t
quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_flush_stub (frame, default_flush_resume,
- local->fd);
+ local->fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -391,7 +353,7 @@ quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -403,23 +365,20 @@ out:
int32_t
quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fsync_stub (frame, default_fsync_resume,
- local->fd, local->flag);
+ local->fd, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -427,7 +386,7 @@ quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -436,23 +395,20 @@ out:
int32_t
quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fstat_stub (frame, default_fstat_resume,
- local->fd);
+ local->fd, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -460,7 +416,7 @@ quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -469,23 +425,20 @@ out:
int32_t
quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_opendir_stub (frame, default_opendir_resume,
- &local->loc, local->fd);
+ &local->loc, local->fd, xdata);
if (!stub) {
STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -493,7 +446,7 @@ quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -502,22 +455,19 @@ out:
int32_t
quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume,
- local->fd, local->flag);
+ local->fd, local->flag, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -525,7 +475,7 @@ quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -534,23 +484,20 @@ out:
int32_t
quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_statfs_stub (frame, default_statfs_resume,
- &local->loc);
+ &local->loc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -558,7 +505,7 @@ quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -567,23 +514,20 @@ out:
int32_t
quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume,
- local->fd, local->name);
+ local->fd, local->name, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -591,7 +535,7 @@ quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -601,23 +545,20 @@ out:
int32_t
quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_getxattr_stub (frame, default_getxattr_resume,
- &local->loc, local->name);
+ &local->loc, local->name, xdata);
if (!stub) {
STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -625,7 +566,7 @@ quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -636,23 +577,20 @@ out:
int32_t
quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum)
+ uint8_t *strong_checksum, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
- local->fd, local->offset, local->flag);
+ local->fd, local->offset, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM,
- 0, NULL);
+ 0, NULL, NULL);
goto out;
}
@@ -661,7 +599,7 @@ quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum);
+ strong_checksum, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -671,23 +609,20 @@ out:
int32_t
quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readdir_stub (frame, default_readdir_resume,
- local->fd, local->size, local->offset);
+ local->fd, local->size, local->offset, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -695,7 +630,7 @@ quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -705,23 +640,21 @@ out:
int32_t
quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
- priv = this->private;
-
local = frame->local;
frame->local = NULL;
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readdirp_stub (frame, default_readdirp_resume,
- local->fd, local->size, local->offset);
+ local->fd, local->size, local->offset,
+ local->dict);
if (!stub) {
STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -729,7 +662,7 @@ quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -742,7 +675,7 @@ out:
int32_t
quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -756,10 +689,11 @@ quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_writev_stub (frame, default_writev_resume,
local->fd, local->vector, local->flag,
- local->offset, local->iobref);
+ local->offset, local->io_flags,
+ local->iobref, xdata);
if (!stub) {
STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -767,7 +701,7 @@ quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -776,7 +710,7 @@ out:
int32_t
quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -790,10 +724,10 @@ quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_xattrop_stub (frame, default_xattrop_resume,
&local->loc, local->xattrop_flags,
- local->dict);
+ local->dict, xdata);
if (!stub) {
STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -801,7 +735,7 @@ quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -810,7 +744,7 @@ out:
int32_t
quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -824,10 +758,10 @@ quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
local->fd, local->xattrop_flags,
- local->dict);
+ local->dict, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -835,7 +769,7 @@ quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -844,7 +778,7 @@ out:
int32_t
quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -857,10 +791,10 @@ quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_lk_stub (frame, default_lk_resume,
- local->fd, local->flag, &local->flock);
+ local->fd, local->flag, &local->flock, xdata);
if (!stub) {
STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -868,7 +802,7 @@ quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -877,7 +811,7 @@ out:
int32_t
quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -891,9 +825,9 @@ quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_inodelk_stub (frame, default_inodelk_resume,
local->volname, &local->loc,
- local->flag, &local->flock);
+ local->flag, &local->flock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -901,7 +835,7 @@ quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -911,7 +845,7 @@ out:
int32_t
quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -925,9 +859,9 @@ quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_finodelk_stub (frame, default_finodelk_resume,
local->volname, local->fd,
- local->flag, &local->flock);
+ local->flag, &local->flock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -935,7 +869,7 @@ quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -944,7 +878,7 @@ out:
int32_t
quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -958,9 +892,9 @@ quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_entrylk_stub (frame, default_entrylk_resume,
local->volname, &local->loc,
- local->name, local->cmd, local->type);
+ local->name, local->cmd, local->type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -968,7 +902,7 @@ quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -977,7 +911,7 @@ out:
int32_t
quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -991,9 +925,9 @@ quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
local->volname, local->fd,
- local->name, local->cmd, local->type);
+ local->name, local->cmd, local->type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -1001,7 +935,7 @@ quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -1011,7 +945,7 @@ out:
int32_t
quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1024,10 +958,10 @@ quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_setattr_stub (frame, default_setattr_resume,
- &local->loc, &local->stbuf, local->flag);
+ &local->loc, &local->stbuf, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -1036,7 +970,7 @@ quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -1046,7 +980,7 @@ out:
int32_t
quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1060,10 +994,10 @@ quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
- local->fd, &local->stbuf, local->flag);
+ local->fd, &local->stbuf, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -1072,7 +1006,7 @@ quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -1090,7 +1024,7 @@ int32_t
quiesce_removexattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1103,14 +1037,14 @@ quiesce_removexattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
loc,
- name);
- return 0;
+ name, xdata);
+ return 0;
}
stub = fop_removexattr_stub (frame, default_removexattr_resume,
- loc, name);
+ loc, name, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1123,7 +1057,7 @@ int32_t
quiesce_truncate (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1136,13 +1070,13 @@ quiesce_truncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
loc,
- offset);
- return 0;
+ offset, xdata);
+ return 0;
}
- stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset);
+ stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1156,7 +1090,7 @@ quiesce_fsetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1170,14 +1104,14 @@ quiesce_fsetxattr (call_frame_t *frame,
FIRST_CHILD(this)->fops->fsetxattr,
fd,
dict,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume,
- fd, dict, flags);
+ fd, dict, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1191,7 +1125,7 @@ quiesce_setxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1205,14 +1139,14 @@ quiesce_setxattr (call_frame_t *frame,
FIRST_CHILD(this)->fops->setxattr,
loc,
dict,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
stub = fop_setxattr_stub (frame, default_setxattr_resume,
- loc, dict, flags);
+ loc, dict, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1224,7 +1158,7 @@ quiesce_setxattr (call_frame_t *frame,
int32_t
quiesce_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1237,15 +1171,15 @@ quiesce_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, (flags & ~O_APPEND), mode, fd, params);
- return 0;
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
+ return 0;
}
stub = fop_create_stub (frame, default_create_resume,
- loc, (flags & ~O_APPEND), mode, fd, params);
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
if (!stub) {
STACK_UNWIND_STRICT (create, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1258,7 +1192,7 @@ int32_t
quiesce_link (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1270,14 +1204,14 @@ quiesce_link (call_frame_t *frame,
default_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
- stub = fop_link_stub (frame, default_link_resume, oldloc, newloc);
+ stub = fop_link_stub (frame, default_link_resume, oldloc, newloc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (link, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1290,7 +1224,7 @@ int32_t
quiesce_rename (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1302,14 +1236,14 @@ quiesce_rename (call_frame_t *frame,
default_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
- stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc);
+ stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1321,7 +1255,7 @@ quiesce_rename (call_frame_t *frame,
int
quiesce_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1332,15 +1266,15 @@ quiesce_symlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
stub = fop_symlink_stub (frame, default_symlink_resume,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
if (!stub) {
STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1351,7 +1285,7 @@ quiesce_symlink (call_frame_t *frame, xlator_t *this,
int
-quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1362,13 +1296,13 @@ quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
STACK_WIND (frame, default_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
- stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags);
+ stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1380,7 +1314,7 @@ quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
int32_t
quiesce_unlink (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1392,13 +1326,13 @@ quiesce_unlink (call_frame_t *frame,
default_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
- stub = fop_unlink_stub (frame, default_unlink_resume, loc);
+ stub = fop_unlink_stub (frame, default_unlink_resume, loc, xflag, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1409,7 +1343,7 @@ quiesce_unlink (call_frame_t *frame,
int
quiesce_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1420,15 +1354,15 @@ quiesce_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
stub = fop_mkdir_stub (frame, default_mkdir_resume,
- loc, mode, params);
+ loc, mode, umask, xdata);
if (!stub) {
STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1440,7 +1374,7 @@ quiesce_mkdir (call_frame_t *frame, xlator_t *this,
int
quiesce_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *parms)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1451,15 +1385,15 @@ quiesce_mknod (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, parms);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
stub = fop_mknod_stub (frame, default_mknod_resume,
- loc, mode, rdev, parms);
+ loc, mode, rdev, umask, xdata);
if (!stub) {
STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1472,7 +1406,7 @@ int32_t
quiesce_ftruncate (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1485,13 +1419,13 @@ quiesce_ftruncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
fd,
- offset);
- return 0;
+ offset, xdata);
+ return 0;
}
- stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset);
+ stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1506,7 +1440,7 @@ int32_t
quiesce_readlink (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- size_t size)
+ size_t size, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1525,13 +1459,13 @@ quiesce_readlink (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
loc,
- size);
- return 0;
+ size, xdata);
+ return 0;
}
- stub = fop_readlink_stub (frame, default_readlink_resume, loc, size);
+ stub = fop_readlink_stub (frame, default_readlink_resume, loc, size, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1545,7 +1479,7 @@ int32_t
quiesce_access (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t mask)
+ int32_t mask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1564,13 +1498,13 @@ quiesce_access (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
loc,
- mask);
- return 0;
+ mask, xdata);
+ return 0;
}
- stub = fop_access_stub (frame, default_access_resume, loc, mask);
+ stub = fop_access_stub (frame, default_access_resume, loc, mask, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (access, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1583,7 +1517,7 @@ int32_t
quiesce_fgetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1604,13 +1538,13 @@ quiesce_fgetxattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
fd,
- name);
+ name, xdata);
return 0;
}
- stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name);
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1622,7 +1556,7 @@ quiesce_fgetxattr (call_frame_t *frame,
int32_t
quiesce_statfs (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1639,13 +1573,13 @@ quiesce_statfs (call_frame_t *frame,
quiesce_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
- stub = fop_statfs_stub (frame, default_statfs_resume, loc);
+ stub = fop_statfs_stub (frame, default_statfs_resume, loc, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1658,7 +1592,7 @@ int32_t
quiesce_fsyncdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1677,13 +1611,13 @@ quiesce_fsyncdir (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
fd,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
- stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags);
+ stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1695,7 +1629,7 @@ quiesce_fsyncdir (call_frame_t *frame,
int32_t
quiesce_opendir (call_frame_t *frame,
xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1713,13 +1647,13 @@ quiesce_opendir (call_frame_t *frame,
quiesce_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
- stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd);
+ stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1731,7 +1665,7 @@ quiesce_opendir (call_frame_t *frame,
int32_t
quiesce_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1748,13 +1682,13 @@ quiesce_fstat (call_frame_t *frame,
quiesce_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
- stub = fop_fstat_stub (frame, default_fstat_resume, fd);
+ stub = fop_fstat_stub (frame, default_fstat_resume, fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1767,7 +1701,7 @@ int32_t
quiesce_fsync (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1786,13 +1720,13 @@ quiesce_fsync (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
fd,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
- stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags);
+ stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1804,7 +1738,7 @@ quiesce_fsync (call_frame_t *frame,
int32_t
quiesce_flush (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1821,13 +1755,13 @@ quiesce_flush (call_frame_t *frame,
quiesce_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
- stub = fop_flush_stub (frame, default_flush_resume, fd);
+ stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1842,8 +1776,8 @@ quiesce_writev (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t off,
- struct iobref *iobref)
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1858,15 +1792,15 @@ quiesce_writev (call_frame_t *frame,
fd,
vector,
count,
- off,
- iobref);
- return 0;
+ off, flags,
+ iobref, xdata);
+ return 0;
}
stub = fop_writev_stub (frame, default_writev_resume,
- fd, vector, count, off, iobref);
+ fd, vector, count, off, flags, iobref, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1880,7 +1814,7 @@ quiesce_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1893,6 +1827,7 @@ quiesce_readv (call_frame_t *frame,
local->fd = fd_ref (fd);
local->size = size;
local->offset = offset;
+ local->io_flag = flags;
frame->local = local;
STACK_WIND (frame,
@@ -1901,14 +1836,15 @@ quiesce_readv (call_frame_t *frame,
FIRST_CHILD(this)->fops->readv,
fd,
size,
- offset);
- return 0;
+ offset, flags, xdata);
+ return 0;
}
- stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset);
+ stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset,
+ flags, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
+ NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -1923,7 +1859,7 @@ quiesce_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
int32_t flags, fd_t *fd,
- int32_t wbflags)
+ dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1939,21 +1875,20 @@ quiesce_open (call_frame_t *frame,
/* Don't send O_APPEND below, as write() re-transmittions can
fail with O_APPEND */
local->flag = (flags & ~O_APPEND);
- local->wbflags = wbflags;
frame->local = local;
STACK_WIND (frame,
quiesce_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, (flags & ~O_APPEND), fd, wbflags);
- return 0;
+ loc, (flags & ~O_APPEND), fd, xdata);
+ return 0;
}
stub = fop_open_stub (frame, default_open_resume, loc,
- (flags & ~O_APPEND), fd, wbflags);
+ (flags & ~O_APPEND), fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1966,7 +1901,7 @@ int32_t
quiesce_getxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1987,13 +1922,13 @@ quiesce_getxattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
loc,
- name);
- return 0;
+ name, xdata);
+ return 0;
}
- stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name);
+ stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2008,7 +1943,7 @@ quiesce_xattrop (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2022,14 +1957,14 @@ quiesce_xattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->xattrop,
loc,
flags,
- dict);
- return 0;
+ dict, xdata);
+ return 0;
}
stub = fop_xattrop_stub (frame, default_xattrop_resume,
- loc, flags, dict);
+ loc, flags, dict, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2043,7 +1978,7 @@ quiesce_fxattrop (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2057,14 +1992,14 @@ quiesce_fxattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->fxattrop,
fd,
flags,
- dict);
- return 0;
+ dict, xdata);
+ return 0;
}
stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
- fd, flags, dict);
+ fd, flags, dict, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2078,7 +2013,7 @@ quiesce_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2092,13 +2027,13 @@ quiesce_lk (call_frame_t *frame,
FIRST_CHILD(this)->fops->lk,
fd,
cmd,
- lock);
- return 0;
+ lock, xdata);
+ return 0;
}
- stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock);
+ stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2111,7 +2046,7 @@ quiesce_lk (call_frame_t *frame,
int32_t
quiesce_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2123,14 +2058,14 @@ quiesce_inodelk (call_frame_t *frame, xlator_t *this,
default_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
stub = fop_inodelk_stub (frame, default_inodelk_resume,
- volume, loc, cmd, lock);
+ volume, loc, cmd, lock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2141,7 +2076,7 @@ quiesce_inodelk (call_frame_t *frame, xlator_t *this,
int32_t
quiesce_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2153,14 +2088,14 @@ quiesce_finodelk (call_frame_t *frame, xlator_t *this,
default_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
stub = fop_finodelk_stub (frame, default_finodelk_resume,
- volume, fd, cmd, lock);
+ volume, fd, cmd, lock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2172,7 +2107,7 @@ quiesce_finodelk (call_frame_t *frame, xlator_t *this,
int32_t
quiesce_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2183,14 +2118,14 @@ quiesce_entrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
stub = fop_entrylk_stub (frame, default_entrylk_resume,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2202,7 +2137,7 @@ quiesce_entrylk (call_frame_t *frame, xlator_t *this,
int32_t
quiesce_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2213,14 +2148,14 @@ quiesce_fentrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2233,7 +2168,7 @@ int32_t
quiesce_rchecksum (call_frame_t *frame,
xlator_t *this,
fd_t *fd, off_t offset,
- int32_t len)
+ int32_t len, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2252,14 +2187,14 @@ quiesce_rchecksum (call_frame_t *frame,
quiesce_rchecksum_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rchecksum,
- fd, offset, len);
- return 0;
+ fd, offset, len, xdata);
+ return 0;
}
stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
- fd, offset, len);
+ fd, offset, len, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL);
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL, NULL);
return 0;
}
@@ -2274,7 +2209,7 @@ quiesce_readdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t off)
+ off_t off, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2293,13 +2228,13 @@ quiesce_readdir (call_frame_t *frame,
quiesce_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
- return 0;
+ fd, size, off, xdata);
+ return 0;
}
- stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off);
+ stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2314,7 +2249,7 @@ quiesce_readdirp (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t off)
+ off_t off, dict_t *dict)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2327,19 +2262,21 @@ quiesce_readdirp (call_frame_t *frame,
local->fd = fd_ref (fd);
local->size = size;
local->offset = off;
+ local->dict = dict_ref (dict);
frame->local = local;
STACK_WIND (frame,
quiesce_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
- return 0;
+ fd, size, off, dict);
+ return 0;
}
- stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size, off);
+ stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size,
+ off, dict);
if (!stub) {
- STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2353,7 +2290,7 @@ quiesce_setattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2365,14 +2302,14 @@ quiesce_setattr (call_frame_t *frame,
default_setattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
+ loc, stbuf, valid, xdata);
+ return 0;
}
stub = fop_setattr_stub (frame, default_setattr_resume,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2385,7 +2322,7 @@ quiesce_setattr (call_frame_t *frame,
int32_t
quiesce_stat (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2402,13 +2339,13 @@ quiesce_stat (call_frame_t *frame,
quiesce_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
- stub = fop_stat_stub (frame, default_stat_resume, loc);
+ stub = fop_stat_stub (frame, default_stat_resume, loc, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2439,9 +2376,8 @@ quiesce_lookup (call_frame_t *frame,
quiesce_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
+ loc, xattr_req);
+ return 0;
}
stub = fop_lookup_stub (frame, default_lookup_resume, loc, xattr_req);
@@ -2461,7 +2397,7 @@ quiesce_fsetattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2473,14 +2409,14 @@ quiesce_fsetattr (call_frame_t *frame,
default_fsetattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
+ fd, stbuf, valid, xdata);
+ return 0;
}
stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2556,7 +2492,7 @@ notify (xlator_t *this, int event, void *data, ...)
{
int ret = 0;
quiesce_priv_t *priv = NULL;
- struct timeval timeout = {0,};
+ struct timespec timeout = {0,};
priv = this->private;
if (!priv)
@@ -2589,7 +2525,7 @@ notify (xlator_t *this, int event, void *data, ...)
if (priv->timer)
break;
timeout.tv_sec = 20;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
priv->timer = gf_timer_call_after (this->ctx,
timeout,
@@ -2663,12 +2599,10 @@ struct xlator_fops fops = {
};
-struct xlator_dumpops dumpops = {
-};
+struct xlator_dumpops dumpops;
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
index 32b1935d9..878ed77e9 100644
--- a/xlators/features/quiesce/src/quiesce.h
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUIESCE_H__
#define __QUIESCE_H__
@@ -25,7 +15,7 @@
#include "xlator.h"
#include "timer.h"
-#define GF_FOPS_EXPECTED_IN_PARALLEL 4096
+#define GF_FOPS_EXPECTED_IN_PARALLEL 512
typedef struct {
gf_timer_t *timer;
@@ -55,6 +45,7 @@ typedef struct {
entrylk_type type;
gf_xattrop_flags_t xattrop_flags;
int32_t wbflags;
+ uint32_t io_flag;
} quiesce_local_t;
#endif
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
index 4baa5f06e..9546f4276 100644
--- a/xlators/features/quota/src/Makefile.am
+++ b/xlators/features/quota/src/Makefile.am
@@ -1,16 +1,17 @@
xlator_LTLIBRARIES = quota.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quota_la_LDFLAGS = -module -avoidversion
+quota_la_LDFLAGS = -module -avoid-version
quota_la_SOURCES = quota.c
quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = quota-mem-types.h quota.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/cluster/dht/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
CLEANFILES =
diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h
index da28be5b3..3082865da 100644
--- a/xlators/features/quota/src/quota-mem-types.h
+++ b/xlators/features/quota/src/quota-mem-types.h
@@ -1,30 +1,19 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUOTA_MEM_TYPES_H__
#define __QUOTA_MEM_TYPES_H__
#include "mem-types.h"
enum gf_quota_mem_types_ {
- gf_quota_mt_quota_local_t = gf_common_mt_end + 1,
- gf_quota_mt_quota_priv_t,
+ gf_quota_mt_quota_priv_t = gf_common_mt_end + 1,
gf_quota_mt_quota_inode_ctx_t,
gf_quota_mt_loc_t,
gf_quota_mt_char,
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 2790672a7..c527e7ca7 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -1,28 +1,21 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <fnmatch.h>
#include "quota.h"
#include "common-utils.h"
+#include "defaults.h"
int32_t
quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
- char *name, ino_t par);
+ char *name, uuid_t par);
struct volume_options options[];
int
@@ -36,7 +29,6 @@ quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
if (inode) {
loc->inode = inode_ref (inode);
- loc->ino = inode->ino;
}
if (parent) {
@@ -80,16 +72,15 @@ quota_inode_loc_fill (inode_t *inode, loc_t *loc)
this = THIS;
- if ((inode) && (inode->ino == 1)) {
+ if ((inode) && __is_root_gfid (inode->gfid)) {
loc->parent = NULL;
goto ignore_parent;
}
parent = inode_parent (inode, 0, NULL);
if (!parent) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot find parent for inode (ino:%"PRId64", "
- "gfid:%s)", inode->ino,
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot find parent for inode (gfid:%s)",
uuid_utoa (inode->gfid));
goto err;
}
@@ -97,9 +88,8 @@ quota_inode_loc_fill (inode_t *inode, loc_t *loc)
ignore_parent:
ret = inode_path (inode, NULL, &resolvedpath);
if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot construct path for inode (ino:%"PRId64", "
- "gfid:%s)", inode->ino,
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot construct path for inode (gfid:%s)",
uuid_utoa (inode->gfid));
goto err;
}
@@ -136,28 +126,28 @@ quota_local_cleanup (xlator_t *this, quota_local_t *local)
inode_unref (local->inode);
LOCK_DESTROY (&local->lock);
+ mem_put (local);
out:
return 0;
}
-quota_local_t *
+static inline quota_local_t *
quota_local_new ()
{
- quota_local_t *local = NULL;
- int32_t ret = 0;
-
- QUOTA_LOCAL_ALLOC_OR_GOTO (local, quota_local_t, err);
-err:
+ quota_local_t *local = NULL;
+ local = mem_get0 (THIS->local_pool);
+ if (local)
+ LOCK_INIT (&local->lock);
return local;
}
quota_dentry_t *
-__quota_dentry_new (quota_inode_ctx_t *ctx, char *name, ino_t par)
+__quota_dentry_new (quota_inode_ctx_t *ctx, char *name, uuid_t par)
{
- quota_dentry_t *dentry = NULL;
- int32_t ret = 0;
+ quota_dentry_t *dentry = NULL;
+ GF_UNUSED int32_t ret = 0;
QUOTA_ALLOC_OR_GOTO (dentry, quota_dentry_t, err);
@@ -169,7 +159,7 @@ __quota_dentry_new (quota_inode_ctx_t *ctx, char *name, ino_t par)
goto err;
}
- dentry->par = par;
+ uuid_copy (dentry->par, par);
list_add_tail (&dentry->next, &ctx->parents);
err:
@@ -195,13 +185,13 @@ out:
int32_t
quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
uint32_t validate_count = 0, link_count = 0;
int32_t ret = 0;
quota_inode_ctx_t *ctx = NULL;
- quota_priv_t *priv = NULL;
int64_t *size = 0;
uint64_t value = 0;
call_stub_t *stub = NULL;
@@ -213,9 +203,6 @@ quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
GF_ASSERT (local);
-
- priv = this->private;
-
GF_ASSERT (frame);
GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, unwind, op_errno,
EINVAL);
@@ -227,8 +214,7 @@ quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ctx = (quota_inode_ctx_t *)(unsigned long)value;
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context is not present in inode (ino:%"PRId64", "
- "gfid:%s)", local->validate_loc.inode->ino,
+ "quota context is not present in inode (gfid:%s)",
uuid_utoa (local->validate_loc.inode->gfid));
op_errno = EINVAL;
goto unwind;
@@ -253,7 +239,7 @@ quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
UNLOCK (&ctx->lock);
- quota_check_limit (frame, local->validate_loc.inode, this, NULL, 0);
+ quota_check_limit (frame, local->validate_loc.inode, this, NULL, NULL);
return 0;
unwind:
@@ -305,7 +291,7 @@ quota_timeout (struct timeval *tv, int32_t timeout)
int32_t
quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
- char *name, ino_t par)
+ char *name, uuid_t par)
{
int32_t ret = -1;
inode_t *_inode = NULL, *parent = NULL;
@@ -318,6 +304,7 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
int32_t validate_count = 0, link_count = 0;
uint64_t value = 0;
char just_validated = 0;
+ uuid_t trav_uuid = {0,};
GF_VALIDATE_OR_GOTO ("quota", this, out);
GF_VALIDATE_OR_GOTO (this->name, frame, out);
@@ -348,6 +335,10 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
}
UNLOCK (&local->lock);
+ if ( par != NULL ) {
+ uuid_copy (trav_uuid, par);
+ }
+
do {
if (ctx != NULL) {
LOCK (&ctx->lock);
@@ -376,23 +367,22 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
}
}
- if (_inode->ino == 1) {
+ if (__is_root_gfid (_inode->gfid)) {
break;
}
- parent = inode_parent (_inode, par, name);
+ parent = inode_parent (_inode, trav_uuid, name);
if (name != NULL) {
name = NULL;
- par = 0;
+ uuid_clear (trav_uuid);
}
if (parent == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot find parent for inode (ino:%"PRId64", "
- "gfid:%s), hence aborting enforcing "
- "quota-limits and continuing with the fop",
- _inode->ino, uuid_utoa (_inode->gfid));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot find parent for inode (gfid:%s), hence "
+ "aborting enforcing quota-limits and continuing"
+ " with the fop", uuid_utoa (_inode->gfid));
}
inode_unref (_inode);
@@ -445,9 +435,8 @@ validate:
ret = quota_inode_loc_fill (_inode, &local->validate_loc);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
- "cannot fill loc for inode (ino:%"PRId64", "
- "gfid:%s), hence aborting quota-checks and "
- "continuing with the fop", _inode->ino,
+ "cannot fill loc for inode (gfid:%s), hence "
+ "aborting quota-checks and continuing with fop",
uuid_utoa (_inode->gfid));
local->validate_count--;
}
@@ -460,7 +449,7 @@ validate:
STACK_WIND (frame, quota_validate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr, &local->validate_loc,
- QUOTA_SIZE_KEY);
+ QUOTA_SIZE_KEY, NULL);
loc_fill_failed:
inode_unref (_inode);
@@ -499,6 +488,8 @@ quota_get_limit_value (inode_t *inode, xlator_t *this, int64_t *n)
}
out:
+ GF_FREE (path);
+
return ret;
}
@@ -511,14 +502,11 @@ __quota_init_inode_ctx (inode_t *inode, int64_t limit, xlator_t *this,
int32_t ret = -1;
int64_t *size = 0;
quota_inode_ctx_t *ctx = NULL;
- quota_priv_t *priv = NULL;
if (inode == NULL) {
goto out;
}
- priv = this->private;
-
QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out);
ctx->limit = limit;
@@ -544,8 +532,8 @@ __quota_init_inode_ctx (inode_t *inode, int64_t limit, xlator_t *this,
ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
- "cannot set quota context in inode (ino:%"PRId64", "
- "gfid:%s)", inode->ino, uuid_utoa (inode->gfid));
+ "cannot set quota context in inode (gfid:%s)",
+ uuid_utoa (inode->gfid));
}
out:
return ret;
@@ -582,17 +570,20 @@ quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *dict, struct iatt *postparent)
{
- int32_t ret = -1;
- char found = 0;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL;
- quota_priv_t *priv = NULL;
- int64_t *size = 0;
- uint64_t value = 0;
+ int32_t ret = -1;
+ char found = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ int64_t *size = 0;
+ uint64_t value = 0;
+ limits_t *limit_node = NULL;
+ quota_priv_t *priv = NULL;
local = frame->local;
+ priv = this->private;
+
inode_ctx_get (inode, this, &value);
ctx = (quota_inode_ctx_t *)(unsigned long)value;
@@ -603,14 +594,23 @@ quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- priv = this->private;
+ LOCK (&priv->lock);
+ {
+ list_for_each_entry (limit_node, &priv->limit_head,
+ limit_list) {
+ if (strcmp (local->loc.path, limit_node->path) == 0) {
+ uuid_copy (limit_node->gfid, buf->ia_gfid);
+ break;
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
ret = quota_inode_ctx_get (local->loc.inode, local->limit, this, dict,
buf, &ctx, 1);
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
- "context in inode(ino:%"PRId64", gfid:%s)",
- local->loc.inode->ino,
+ "context in inode(gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
op_ret = -1;
op_errno = ENOMEM;
@@ -639,9 +639,13 @@ quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unlock;
}
+ if (local->loc.name == NULL)
+ goto unlock;
+
list_for_each_entry (dentry, &ctx->parents, next) {
- if ((strcmp (dentry->name, local->loc.name) == 0)
- && (local->loc.parent->ino == dentry->par)) {
+ if ((strcmp (dentry->name, local->loc.name) == 0) &&
+ (uuid_compare (local->loc.parent->gfid,
+ dentry->par) == 0)) {
found = 1;
break;
}
@@ -650,15 +654,13 @@ quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!found) {
dentry = __quota_dentry_new (ctx,
(char *)local->loc.name,
- local->loc.parent->ino);
+ local->loc.parent->gfid);
if (dentry == NULL) {
/*
gf_log (this->name, GF_LOG_WARNING,
"cannot create a new dentry (par:%"
PRId64", name:%s) for inode(ino:%"
PRId64", gfid:%s)",
- local->loc.parent->ino,
- local->loc.inode->ino,
uuid_utoa (local->loc.inode->gfid));
*/
op_ret = -1;
@@ -745,12 +747,14 @@ err:
void
-quota_update_size (xlator_t *this, inode_t *inode, char *name, ino_t par,
+quota_update_size (xlator_t *this, inode_t *inode, char *name, uuid_t par,
int64_t delta)
{
- inode_t *_inode = NULL, *parent = NULL;
- uint64_t value = 0;
- quota_inode_ctx_t *ctx = NULL;
+ inode_t *_inode = NULL;
+ inode_t *parent = NULL;
+ uint64_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ uuid_t trav_uuid = {0,};
GF_VALIDATE_OR_GOTO ("quota", this, out);
GF_VALIDATE_OR_GOTO (this->name, inode, out);
@@ -760,6 +764,10 @@ quota_update_size (xlator_t *this, inode_t *inode, char *name, ino_t par,
_inode = inode_ref (inode);
+ if ( par != NULL ) {
+ uuid_copy (trav_uuid, par);
+ }
+
do {
if ((ctx != NULL) && (ctx->limit >= 0)) {
LOCK (&ctx->lock);
@@ -769,22 +777,21 @@ quota_update_size (xlator_t *this, inode_t *inode, char *name, ino_t par,
UNLOCK (&ctx->lock);
}
- if (_inode->ino == 1) {
+ if (__is_root_gfid (_inode->gfid)) {
break;
}
- parent = inode_parent (_inode, par, name);
+ parent = inode_parent (_inode, trav_uuid, name);
if (parent == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot find parent for inode (ino:%"PRId64", "
- "gfid:%s), hence aborting size updation of "
- "parents",
- _inode->ino, uuid_utoa (_inode->gfid));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot find parent for inode (gfid:%s), hence "
+ "aborting size updation of parents",
+ uuid_utoa (_inode->gfid));
}
if (name != NULL) {
name = NULL;
- par = 0;
+ uuid_clear (trav_uuid);
}
inode_unref (_inode);
@@ -806,7 +813,7 @@ out:
int32_t
quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t ret = 0;
uint64_t ctx_int = 0;
@@ -822,14 +829,18 @@ quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context", local->loc.path);
+ goto out;
+ }
ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
- uuid_utoa (local->loc.inode->gfid));
+ "quota context not set in %s (gfid:%s)",
+ local->loc.path, uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -846,7 +857,8 @@ quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
out:
- QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -855,7 +867,7 @@ out:
int32_t
quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
@@ -873,11 +885,11 @@ quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
STACK_WIND (frame, quota_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- iobref);
+ flags, iobref, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -885,7 +897,7 @@ unwind:
int32_t
quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int32_t ret = -1, op_errno = EINVAL;
int32_t parents = 0;
@@ -911,14 +923,13 @@ quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64", "
- "gfid:%s)", fd->inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (fd->inode->gfid));
goto unwind;
}
stub = fop_writev_stub (frame, quota_writev_helper, fd, vector, count,
- off, iobref);
+ off, flags, iobref, xdata);
if (stub == NULL) {
op_errno = ENOMEM;
goto unwind;
@@ -967,7 +978,7 @@ quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
return 0;
unwind:
- QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -976,17 +987,17 @@ int32_t
quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
QUOTA_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent);
+ buf, preparent, postparent, xdata);
return 0;
}
int32_t
quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dict_t *params)
+ mode_t mode, mode_t umask, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
@@ -1004,19 +1015,19 @@ quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
STACK_WIND (frame, quota_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, params);
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int32_t
quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
int32_t ret = 0, op_errno = 0;
quota_local_t *local = NULL;
@@ -1039,7 +1050,8 @@ quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto err;
}
- stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, params);
+ stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, umask,
+ xdata);
if (stub == NULL) {
op_errno = ENOMEM;
goto err;
@@ -1048,7 +1060,7 @@ quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->stub = stub;
local->delta = 0;
- quota_check_limit (frame, loc->parent, this, NULL, 0);
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
stub = NULL;
@@ -1070,7 +1082,7 @@ quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
return 0;
err:
QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
}
@@ -1080,7 +1092,7 @@ int32_t
quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -1095,8 +1107,8 @@ quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
- "context in inode(ino:%"PRId64", gfid:%s)",
- inode->ino, uuid_utoa (inode->gfid));
+ "context in inode(gfid:%s)",
+ uuid_utoa (inode->gfid));
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
@@ -1107,13 +1119,11 @@ quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ctx->buf = *buf;
dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
- local->loc.parent->ino);
+ local->loc.parent->gfid);
if (dentry == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (par:%"
- PRId64", name:%s) for inode(ino:%"
- PRId64", gfid:%s)", local->loc.parent->ino,
- local->loc.name, local->loc.inode->ino,
+ "cannot create a new dentry (name:%s) for "
+ "inode(gfid:%s)", local->loc.name,
uuid_utoa (local->loc.inode->gfid));
op_ret = -1;
op_errno = ENOMEM;
@@ -1125,14 +1135,15 @@ unlock:
unwind:
QUOTA_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int32_t
quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
@@ -1149,20 +1160,20 @@ quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
STACK_WIND (frame, quota_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd,
- params);
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
int32_t
quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -1182,7 +1193,7 @@ quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
}
stub = fop_create_stub (frame, quota_create_helper, loc, flags, mode,
- fd, params);
+ umask, fd, xdata);
if (stub == NULL) {
goto err;
}
@@ -1191,7 +1202,7 @@ quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
local->stub = stub;
local->delta = 0;
- quota_check_limit (frame, loc->parent, this, NULL, 0);
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
stub = NULL;
@@ -1212,7 +1223,7 @@ quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
return 0;
err:
QUOTA_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
}
@@ -1221,7 +1232,7 @@ err:
int32_t
quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -1238,25 +1249,25 @@ quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
quota_update_size (this, local->loc.inode, (char *)local->loc.name,
- local->loc.parent->ino,
+ local->loc.parent->gfid,
(-(ctx->buf.ia_blocks * 512)));
out:
QUOTA_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
return 0;
}
int32_t
-quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
int32_t ret = 0;
quota_local_t *local = NULL;
@@ -1275,13 +1286,13 @@ quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
STACK_WIND (frame, quota_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
ret = 0;
err:
if (ret == -1) {
- QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL);
+ QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL, NULL);
}
return 0;
@@ -1292,7 +1303,7 @@ int32_t
quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -1306,14 +1317,14 @@ quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = (quota_local_t *) frame->local;
- quota_update_size (this, local->loc.parent, NULL, 0,
+ quota_update_size (this, local->loc.parent, NULL, NULL,
(buf->ia_blocks * 512));
ret = quota_inode_ctx_get (inode, -1, this, NULL, NULL, &ctx, 0);
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING, "cannot find quota "
- "context in inode(ino:%"PRId64", gfid:%s)",
- inode->ino, uuid_utoa (inode->gfid));
+ "context in %s (gfid:%s)", local->loc.path,
+ uuid_utoa (inode->gfid));
op_ret = -1;
op_errno = EINVAL;
goto out;
@@ -1322,15 +1333,14 @@ quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&ctx->lock);
{
list_for_each_entry (dentry, &ctx->parents, next) {
- if ((strcmp (dentry->name, local->loc.name) == 0)
- && (local->loc.parent->ino == dentry->par)) {
+ if ((strcmp (dentry->name, local->loc.name) == 0) &&
+ (uuid_compare (local->loc.parent->gfid,
+ dentry->par) == 0)) {
found = 1;
gf_log (this->name, GF_LOG_WARNING,
- "new entry being linked (par:%"
- PRId64", name:%s) for inode (ino:%"
- PRId64", gfid:%s) is already present "
- "in inode-dentry-list", dentry->par,
- dentry->name, local->loc.inode->ino,
+ "new entry being linked (name:%s) for "
+ "inode (gfid:%s) is already present "
+ "in inode-dentry-list", dentry->name,
uuid_utoa (local->loc.inode->gfid));
break;
}
@@ -1339,15 +1349,11 @@ quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!found) {
dentry = __quota_dentry_new (ctx,
(char *)local->loc.name,
- local->loc.parent->ino);
+ local->loc.parent->gfid);
if (dentry == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (par:%"
- PRId64", name:%s) for inode(ino:%"
- PRId64", gfid:%s)",
- local->loc.parent->ino,
- local->loc.name,
- local->loc.inode->ino,
+ "cannot create a new dentry (name:%s) "
+ "for inode(gfid:%s)", local->loc.name,
uuid_utoa (local->loc.inode->gfid));
op_ret = -1;
op_errno = ENOMEM;
@@ -1362,7 +1368,7 @@ unlock:
out:
QUOTA_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1370,7 +1376,7 @@ out:
int32_t
quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
@@ -1388,18 +1394,19 @@ quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int32_t
-quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
int32_t ret = -1, op_errno = ENOMEM;
quota_local_t *local = NULL;
@@ -1419,7 +1426,7 @@ quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
goto err;
}
- stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc);
+ stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc, xdata);
if (stub == NULL) {
goto err;
}
@@ -1431,16 +1438,15 @@ quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", oldloc->inode?oldloc->inode->ino:0,
- oldloc->inode?uuid_utoa (oldloc->inode->gfid):"0");
+ "quota context not set in inode (gfid:%s)",
+ oldloc->inode ? uuid_utoa (oldloc->inode->gfid) : "0");
op_errno = EINVAL;
goto err;
}
local->delta = ctx->buf.ia_blocks * 512;
- quota_check_limit (frame, newloc->parent, this, NULL, 0);
+ quota_check_limit (frame, newloc->parent, this, NULL, NULL);
stub = NULL;
@@ -1463,7 +1469,7 @@ quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
err:
if (ret < 0) {
QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
}
return 0;
@@ -1474,7 +1480,8 @@ int32_t
quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -1501,8 +1508,8 @@ quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (local->oldloc.parent != local->newloc.parent) {
- quota_update_size (this, local->oldloc.parent, NULL, 0, (-size));
- quota_update_size (this, local->newloc.parent, NULL, 0, size);
+ quota_update_size (this, local->oldloc.parent, NULL, NULL, (-size));
+ quota_update_size (this, local->newloc.parent, NULL, NULL, size);
}
if (!(IA_ISREG (local->oldloc.inode->ia_type)
@@ -1514,8 +1521,7 @@ quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING, "quota context not"
- "set in inode(ino:%"PRId64", gfid:%s)",
- local->oldloc.inode->ino,
+ "set in inode(gfid:%s)",
uuid_utoa (local->oldloc.inode->gfid));
op_ret = -1;
op_errno = EINVAL;
@@ -1531,19 +1537,19 @@ quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* should be changed to set a new context in newloc->inode.
*/
list_for_each_entry (dentry, &ctx->parents, next) {
- if ((strcmp (dentry->name, local->oldloc.name) == 0)
- && (local->oldloc.parent->ino == dentry->par)) {
+ if ((strcmp (dentry->name, local->oldloc.name) == 0) &&
+ (uuid_compare (local->oldloc.parent->gfid,
+ dentry->par) == 0)) {
old_dentry = dentry;
- } else if ((strcmp (dentry->name, local->newloc.name)
- == 0) && (local->oldloc.parent->ino
- == dentry->par)) {
+ } else if ((strcmp (dentry->name,
+ local->newloc.name) == 0) &&
+ (uuid_compare (local->oldloc.parent->gfid,
+ dentry->par) == 0)) {
new_dentry_found = 1;
gf_log (this->name, GF_LOG_WARNING,
- "new entry being linked (par:%"
- PRId64", name:%s) for inode (ino:%"
- PRId64", gfid:%s) is already present "
- "in inode-dentry-list", dentry->par,
- dentry->name, local->newloc.inode->ino,
+ "new entry being linked (name:%s) for "
+ "inode (gfid:%s) is already present "
+ "in inode-dentry-list", dentry->name,
uuid_utoa (local->newloc.inode->gfid));
break;
}
@@ -1554,22 +1560,17 @@ quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
} else {
gf_log (this->name, GF_LOG_WARNING,
"dentry corresponding to the path just renamed "
- "(par:%"PRId64", name:%s) is not present",
- local->oldloc.inode->ino, local->oldloc.name);
+ "(name:%s) is not present", local->oldloc.name);
}
if (!new_dentry_found) {
dentry = __quota_dentry_new (ctx,
(char *)local->newloc.name,
- local->newloc.parent->ino);
+ local->newloc.parent->gfid);
if (dentry == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (par:%"
- PRId64", name:%s) for inode(ino:%"
- PRId64", gfid:%s)",
- local->newloc.parent->ino,
- local->newloc.name,
- local->newloc.inode->ino,
+ "cannot create a new dentry (name:%s) "
+ "for inode(gfid:%s)", local->newloc.name,
uuid_utoa (local->newloc.inode->gfid));
op_ret = -1;
op_errno = ENOMEM;
@@ -1584,7 +1585,7 @@ unlock:
out:
QUOTA_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
+ postoldparent, prenewparent, postnewparent, xdata);
return 0;
}
@@ -1592,7 +1593,7 @@ out:
int32_t
quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
@@ -1610,19 +1611,19 @@ quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
STACK_WIND (frame, quota_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
int32_t
quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
int32_t ret = -1, op_errno = ENOMEM;
quota_local_t *local = NULL;
@@ -1648,7 +1649,8 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
goto err;
}
- stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc);
+ stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc,
+ xdata);
if (stub == NULL) {
goto err;
}
@@ -1662,11 +1664,9 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)",
- oldloc->inode ? oldloc->inode->ino:0,
+ "quota context not set in inode (gfid:%s)",
oldloc->inode ? uuid_utoa (oldloc->inode->gfid)
- :"0");
+ : "0");
op_errno = EINVAL;
goto err;
}
@@ -1675,7 +1675,7 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
local->delta = 0;
}
- quota_check_limit (frame, newloc->parent, this, NULL, 0);
+ quota_check_limit (frame, newloc->parent, this, NULL, NULL);
stub = NULL;
@@ -1698,7 +1698,7 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
err:
if (ret == -1) {
QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1709,7 +1709,7 @@ int32_t
quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int64_t size = 0;
quota_local_t *local = NULL;
@@ -1723,14 +1723,13 @@ quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
size = buf->ia_blocks * 512;
- quota_update_size (this, local->loc.parent, NULL, 0, size);
+ quota_update_size (this, local->loc.parent, NULL, NULL, size);
quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
&ctx, 1);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -1740,13 +1739,11 @@ quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ctx->buf = *buf;
dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
- local->loc.parent->ino);
+ local->loc.parent->gfid);
if (dentry == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (par:%"
- PRId64", name:%s) for inode(ino:%"
- PRId64", gfid:%s)", local->loc.parent->ino,
- local->loc.name, local->loc.inode->ino,
+ "cannot create a new dentry (name:%s) for "
+ "inode(gfid:%s)", local->loc.name,
uuid_utoa (local->loc.inode->gfid));
op_ret = -1;
op_errno = ENOMEM;
@@ -1756,7 +1753,7 @@ quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1764,7 +1761,7 @@ out:
int
quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
@@ -1781,19 +1778,20 @@ quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath,
}
STACK_WIND (frame, quota_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, params);
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int
quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int32_t ret = -1;
int32_t op_errno = ENOMEM;
@@ -1816,7 +1814,7 @@ quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
local->link_count = 1;
stub = fop_symlink_stub (frame, quota_symlink_helper, linkpath, loc,
- params);
+ umask, xdata);
if (stub == NULL) {
goto err;
}
@@ -1824,7 +1822,7 @@ quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
local->stub = stub;
local->delta = strlen (linkpath);
- quota_check_limit (frame, loc->parent, this, NULL, 0);
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
stub = NULL;
@@ -1847,7 +1845,7 @@ quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
err:
QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
}
@@ -1856,7 +1854,7 @@ err:
int32_t
quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quota_local_t *local = NULL;
int64_t delta = 0;
@@ -1874,14 +1872,13 @@ quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
- quota_update_size (this, local->loc.inode, NULL, 0, delta);
+ quota_update_size (this, local->loc.inode, NULL, NULL, delta);
quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -1894,13 +1891,14 @@ quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
return 0;
}
int32_t
-quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -1919,11 +1917,11 @@ quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
}
STACK_WIND (frame, quota_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
err:
- QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1932,7 +1930,7 @@ err:
int32_t
quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quota_local_t *local = NULL;
int64_t delta = 0;
@@ -1950,14 +1948,13 @@ quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
- quota_update_size (this, local->loc.inode, NULL, 0, delta);
+ quota_update_size (this, local->loc.inode, NULL, NULL, delta);
quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -1970,13 +1967,14 @@ quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
return 0;
}
int32_t
-quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -1989,11 +1987,11 @@ quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
local->loc.inode = inode_ref (fd->inode);
STACK_WIND (frame, quota_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
err:
- QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2028,7 +2026,7 @@ quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_INFO, "str = %s", dir_limit);
- QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict);
+ QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict, NULL);
ret = 0;
@@ -2039,11 +2037,11 @@ out:
int32_t
quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
int32_t ret = 0;
- if (strcasecmp (name, "trusted.limit.list") == 0) {
+ if (name && strcasecmp (name, "trusted.limit.list") == 0) {
ret = quota_send_dir_limit_to_cli (frame, this, fd->inode,
name);
if (ret == 0) {
@@ -2052,14 +2050,14 @@ quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name);
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
return 0;
}
int32_t
quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int32_t ret = 0;
@@ -2071,14 +2069,14 @@ quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
return 0;
}
int32_t
quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2097,8 +2095,7 @@ quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_DEBUG,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2111,13 +2108,13 @@ quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
+ QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int32_t
-quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t ret = -1;
@@ -2135,18 +2132,19 @@ quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
STACK_WIND (frame, quota_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL);
+ QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
int32_t
quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2165,8 +2163,7 @@ quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2179,13 +2176,13 @@ quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
+ QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int32_t
-quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -2199,11 +2196,11 @@ quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
local->loc.inode = inode_ref (fd->inode);
STACK_WIND (frame, quota_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL);
+ QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2211,7 +2208,7 @@ unwind:
int32_t
quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2230,8 +2227,7 @@ quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2243,13 +2239,14 @@ quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf);
+ QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf, xdata);
return 0;
}
int32_t
-quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t ret = -1;
@@ -2268,11 +2265,11 @@ quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
}
STACK_WIND (frame, quota_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2280,7 +2277,8 @@ unwind:
int32_t
quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref)
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2299,8 +2297,7 @@ quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2313,14 +2310,14 @@ quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref);
+ buf, iobref, xdata);
return 0;
}
int32_t
quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -2334,11 +2331,12 @@ quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->loc.inode = inode_ref (fd->inode);
STACK_WIND (frame, quota_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset);
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL);
+ QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL, NULL);
return 0;
}
@@ -2346,7 +2344,7 @@ unwind:
int32_t
quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2365,8 +2363,7 @@ quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2378,13 +2375,15 @@ quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
int32_t
-quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -2398,11 +2397,11 @@ quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
frame->local = local;
STACK_WIND (frame, quota_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2411,7 +2410,7 @@ unwind:
int32_t
quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2430,8 +2429,7 @@ quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_DEBUG,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2445,14 +2443,14 @@ quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
return 0;
}
int32_t
quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t ret = -1;
@@ -2471,11 +2469,11 @@ quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
STACK_WIND (frame, quota_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid);
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2483,7 +2481,7 @@ unwind:
int32_t
quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2502,8 +2500,7 @@ quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (ino:%"PRId64
- ", gfid:%s)", local->loc.inode->ino,
+ "quota context not set in inode (gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2516,14 +2513,14 @@ quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
return 0;
}
int32_t
quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -2537,11 +2534,609 @@ quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->loc.inode = inode_ref (fd->inode);
STACK_WIND (frame, quota_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid);
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
+ "context in inode (gfid:%s)", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot create a new dentry (name:%s) for "
+ "inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+unwind:
+ QUOTA_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_mknod_stub (frame, quota_mknod_helper, loc, mode, rdev,
+ umask, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = 0;
+
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 0;
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+int
+quota_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+ int op_ret = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
+ op_errno, err);
+
+ STACK_WIND (frame, quota_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+quota_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
+ op_errno, err);
+
+ STACK_WIND (frame, quota_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+ err:
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+
+int
+quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*",
+ name, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, quota_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+quota_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, quota_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ inode_t *root_inode = NULL;
+ quota_priv_t *priv = NULL;
+ uint64_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ limits_t *limit_node = NULL;
+ int64_t usage = -1;
+ int64_t avail = -1;
+ int64_t blocks = 0;
+
+ root_inode = cookie;
+
+ /* This fop will fail mostly in case of client disconnect's,
+ * which is already logged. Hence, not logging here */
+ if (op_ret == -1)
+ goto unwind;
+ /*
+ * We should never get here unless quota_statfs (below) sent us a
+ * cookie, and it would only do so if the value was non-NULL. This
+ * check is therefore just routine defensive coding.
+ */
+ if (!root_inode) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "null inode, cannot adjust for quota");
+ goto unwind;
+ }
+ if (!root_inode->table || (root_inode != root_inode->table->root)) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "non-root inode, cannot adjust for quota");
+ goto unwind;
+ }
+
+ inode_ctx_get (root_inode, this, &value);
+ if (!value) {
+ goto unwind;
+ }
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ usage = (ctx->size) / buf->f_bsize;
+ priv = this->private;
+
+ list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
+ /* Notice that this only works for volume-level quota. */
+ if (strcmp (limit_node->path, "/") == 0) {
+ blocks = limit_node->value / buf->f_bsize;
+ if (usage > blocks) {
+ break;
+ }
+
+ buf->f_blocks = blocks;
+ avail = buf->f_blocks - usage;
+ if (buf->f_bfree > avail) {
+ buf->f_bfree = avail;
+ }
+ /*
+ * We have to assume that the total assigned quota
+ * won't cause us to dip into the reserved space,
+ * because dealing with the overcommitted cases is
+ * just too hairy (especially when different bricks
+ * might be using different reserved percentages and
+ * such).
+ */
+ buf->f_bavail = buf->f_bfree;
+ break;
+ }
+ }
+
+unwind:
+ if (root_inode) {
+ inode_unref(root_inode);
+ }
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ inode_t *root_inode = NULL;
+ quota_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->consider_statfs && loc->inode) {
+ root_inode = loc->inode->table->root;
+ inode_ref(root_inode);
+ STACK_WIND_COOKIE (frame, quota_statfs_cbk, root_inode,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ }
+ else {
+ /*
+ * We have to make sure that we never get to quota_statfs_cbk
+ * with a cookie that points to something other than an inode,
+ * which is exactly what would happen with STACK_UNWIND using
+ * that as a callback. Therefore, use default_statfs_cbk in
+ * this case instead.
+ *
+ * Also if the option deem-statfs is not set to "on" don't
+ * bother calculating quota limit on / in statfs_cbk.
+ */
+ if (priv->consider_statfs)
+ gf_log(this->name,GF_LOG_WARNING,
+ "missing inode, cannot adjust for quota");
+ STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ }
+ return 0;
+}
+
+
+int
+quota_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ /* TODO: fill things */
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+int
+quota_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ int ret = 0;
+
+ if (dict) {
+ ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0);
+ if (ret < 0) {
+ goto err;
+ }
+ }
+
+ STACK_WIND (frame, quota_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_dentry_t *dentry = NULL;
+ int64_t delta = 0;
+
+ local = frame->local;
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
+
+ ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context", local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in %s (gfid:%s)",
+ local->loc.path, uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
+ quota_update_size (this, local->loc.inode,
+ dentry->name, dentry->par, delta);
+ }
+
+out:
+ QUOTA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+quota_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = -1, op_errno = EINVAL;
+ int32_t parents = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO ("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+
+ ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (fd->inode->gfid));
+ goto unwind;
+ }
+
+ stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode, offset, len,
+ xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ parents++;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ /*
+ * Note that by using len as the delta we're assuming the range from
+ * offset to offset+len has not already been allocated. This can result
+ * in ENOSPC errors attempting to allocate an already allocated range.
+ */
+ local->delta = len;
+ local->stub = stub;
+ local->link_count = parents;
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ ret = quota_check_limit (frame, fd->inode, this, dentry->name,
+ dentry->par);
+ if (ret == -1) {
+ break;
+ }
+ }
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 0;
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -2597,53 +3192,28 @@ quota_forget (xlator_t *this, inode_t *inode)
return 0;
}
-int
-validate_options (xlator_t *this, char **op_errstr)
-{
- int ret = 0;
- volume_opt_list_t *vol_opt = NULL;
- volume_opt_list_t *tmp;
- if (!this) {
- gf_log (this->name, GF_LOG_DEBUG, "'this' not a valid ptr");
- ret =-1;
- goto out;
- }
-
- if (list_empty (&this->volume_options))
- goto out;
-
- vol_opt = list_entry (this->volume_options.next,
- volume_opt_list_t, list);
- list_for_each_entry_safe (vol_opt, tmp, &this->volume_options, list) {
- ret = validate_xlator_volume_options_attacherr (this,
- vol_opt->given_opt,
- op_errstr);
- }
-
-out:
-
- return ret;
-}
-
-int32_t
-quota_parse_options (quota_priv_t *priv, xlator_t *this, dict_t *xl_options)
+int
+quota_parse_limits (quota_priv_t *priv, xlator_t *this, dict_t *xl_options,
+ struct list_head *old_list)
{
int32_t ret = -1;
char *str = NULL;
char *str_val = NULL;
- char *path = NULL;
+ char *path = NULL, *saveptr = NULL;
uint64_t value = 0;
- limits_t *quota_lim = NULL;
- char *def_val = NULL;
+ limits_t *quota_lim = NULL, *old = NULL;
+ char *last_colon= NULL;
ret = dict_get_str (xl_options, "limit-set", &str);
if (str) {
- path = strtok (str, ":");
+ path = strtok_r (str, ",", &saveptr);
while (path) {
- str_val = strtok (NULL, ",");
+ last_colon = strrchr (path, ':');
+ *last_colon = '\0';
+ str_val = last_colon + 1;
ret = gf_string2bytesize (str_val, &value);
if (ret != 0)
@@ -2658,51 +3228,40 @@ quota_parse_options (quota_priv_t *priv, xlator_t *this, dict_t *xl_options)
gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64,
quota_lim->path, quota_lim->value);
- list_add_tail (&quota_lim->limit_list,
- &priv->limit_head);
+ if (old_list != NULL) {
+ list_for_each_entry (old, old_list,
+ limit_list) {
+ if (strcmp (old->path, quota_lim->path)
+ == 0) {
+ uuid_copy (quota_lim->gfid,
+ old->gfid);
+ break;
+ }
+ }
+ }
+
+ LOCK (&priv->lock);
+ {
+ list_add_tail (&quota_lim->limit_list,
+ &priv->limit_head);
+ }
+ UNLOCK (&priv->lock);
- path = strtok (NULL, ":");
+ path = strtok_r (NULL, ",", &saveptr);
}
} else {
gf_log (this->name, GF_LOG_INFO,
"no \"limit-set\" option provided");
}
- if (xlator_get_volopt_info (&this->volume_options, "timeout", &def_val,
- NULL)) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of timeout"
- "not found");
- ret = -1;
- goto err;
- } else {
- if (gf_string2bytesize (def_val,(uint64_t *) &priv->timeout )) {
- gf_log (this->name, GF_LOG_ERROR, "Default value of "
- " timeout corrupt");
- ret = -1;
- goto err;
- }
- }
-
- ret = dict_get_str (xl_options, "timeout", &str);
- if (str) {
- ret = gf_string2bytesize (str, &value);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Invalid quota timout value.");
- ret = -1;
- goto err;
- } else {
- priv->timeout = (int64_t) value;
- gf_log (this->name, GF_LOG_INFO,
- "quota timeout value = %"PRId64,
- priv->timeout);
+ LOCK (&priv->lock);
+ {
+ list_for_each_entry (quota_lim, &priv->limit_head, limit_list) {
+ gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64,
+ quota_lim->path, quota_lim->value);
}
}
-
- list_for_each_entry (quota_lim, &priv->limit_head, limit_list) {
- gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64, quota_lim->path,
- quota_lim->value);
- }
+ UNLOCK (&priv->lock);
ret = 0;
err:
@@ -2733,45 +3292,152 @@ init (xlator_t *this)
INIT_LIST_HEAD (&priv->limit_head);
+ LOCK_INIT (&priv->lock);
+
this->private = priv;
- ret = quota_parse_options (priv, this, this->options);
+ ret = quota_parse_limits (priv, this, this->options, NULL);
if (ret) {
goto err;
}
+ GF_OPTION_INIT ("timeout", priv->timeout, int64, err);
+ GF_OPTION_INIT ("deem-statfs", priv->consider_statfs, bool, err);
+
+ this->local_pool = mem_pool_new (quota_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
ret = 0;
err:
return ret;
}
+void
+__quota_reconfigure_inode_ctx (xlator_t *this, inode_t *inode, limits_t *limit)
+{
+ int ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quota", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, limit, out);
+
+ ret = quota_inode_ctx_get (inode, limit->value, this, NULL, NULL, &ctx,
+ 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
+ "context in inode(gfid:%s)",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->limit = limit->value;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ return;
+}
+
+
+void
+__quota_reconfigure (xlator_t *this, inode_table_t *itable, limits_t *limit)
+{
+ inode_t *inode = NULL;
+
+ if ((this == NULL) || (itable == NULL) || (limit == NULL)) {
+ goto out;
+ }
+
+ if (!uuid_is_null (limit->gfid)) {
+ inode = inode_find (itable, limit->gfid);
+ } else {
+ inode = inode_resolve (itable, limit->path);
+ }
+
+ if (inode != NULL) {
+ __quota_reconfigure_inode_ctx (this, inode, limit);
+ }
+
+out:
+ return;
+}
+
+
int
reconfigure (xlator_t *this, dict_t *options)
{
- int32_t ret = -1;
- quota_priv_t *priv = NULL;
- limits_t *limit = NULL;
- limits_t *next = NULL;
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ limits_t *limit = NULL, *next = NULL, *new = NULL;
+ struct list_head head = {0, };
+ xlator_t *top = NULL;
+ char found = 0;
priv = this->private;
- list_for_each_entry_safe (limit, next, &priv->limit_head, limit_list) {
- list_del (&limit->limit_list);
+ INIT_LIST_HEAD (&head);
- GF_FREE (limit);
+ LOCK (&priv->lock);
+ {
+ list_splice_init (&priv->limit_head, &head);
}
+ UNLOCK (&priv->lock);
- ret = quota_parse_options (priv, this, options);
+ ret = quota_parse_limits (priv, this, options, &head);
if (ret == -1) {
gf_log ("quota", GF_LOG_WARNING,
"quota reconfigure failed, "
"new changes will not take effect");
goto out;
}
+
+ LOCK (&priv->lock);
+ {
+ top = ((glusterfs_ctx_t *)this->ctx)->active->top;
+ GF_ASSERT (top);
+
+ list_for_each_entry (limit, &priv->limit_head, limit_list) {
+ __quota_reconfigure (this, top->itable, limit);
+ }
+
+ list_for_each_entry_safe (limit, next, &head, limit_list) {
+ found = 0;
+ list_for_each_entry (new, &priv->limit_head,
+ limit_list) {
+ if (strcmp (new->path, limit->path) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ limit->value = -1;
+ __quota_reconfigure (this, top->itable, limit);
+ }
+
+ list_del_init (&limit->limit_list);
+ GF_FREE (limit);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ GF_OPTION_RECONF ("timeout", priv->timeout, options, int64, out);
+ GF_OPTION_RECONF ("deem-statfs", priv->consider_statfs, options, bool,
+ out);
+
+ ret = 0;
out:
- return 0;
+ return ret;
}
@@ -2783,25 +3449,33 @@ fini (xlator_t *this)
struct xlator_fops fops = {
- .lookup = quota_lookup,
- .writev = quota_writev,
- .create = quota_create,
- .mkdir = quota_mkdir,
- .truncate = quota_truncate,
- .ftruncate = quota_ftruncate,
- .unlink = quota_unlink,
- .symlink = quota_symlink,
- .link = quota_link,
- .rename = quota_rename,
- .getxattr = quota_getxattr,
- .fgetxattr = quota_fgetxattr,
- .stat = quota_stat,
- .fstat = quota_fstat,
- .readlink = quota_readlink,
- .readv = quota_readv,
- .fsync = quota_fsync,
- .setattr = quota_setattr,
- .fsetattr = quota_fsetattr,
+ .statfs = quota_statfs,
+ .lookup = quota_lookup,
+ .writev = quota_writev,
+ .create = quota_create,
+ .mkdir = quota_mkdir,
+ .truncate = quota_truncate,
+ .ftruncate = quota_ftruncate,
+ .unlink = quota_unlink,
+ .symlink = quota_symlink,
+ .link = quota_link,
+ .rename = quota_rename,
+ .getxattr = quota_getxattr,
+ .fgetxattr = quota_fgetxattr,
+ .stat = quota_stat,
+ .fstat = quota_fstat,
+ .readlink = quota_readlink,
+ .readv = quota_readv,
+ .fsync = quota_fsync,
+ .setattr = quota_setattr,
+ .fsetattr = quota_fsetattr,
+ .mknod = quota_mknod,
+ .setxattr = quota_setxattr,
+ .fsetxattr = quota_fsetxattr,
+ .removexattr = quota_removexattr,
+ .fremovexattr = quota_fremovexattr,
+ .readdirp = quota_readdirp,
+ .fallocate = quota_fallocate,
};
struct xlator_cbks cbks = {
@@ -2812,9 +3486,18 @@ struct volume_options options[] = {
{.key = {"limit-set"}},
{.key = {"timeout"},
.type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 60,
.default_value = "0",
.description = "quota caches the directory sizes on client. Timeout "
"indicates the timeout for the cache to be revalidated."
},
+ {.key = {"deem-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If set to on, it takes quota limits into"
+ "consideration while estimating fs size. (df command)"
+ " (Default is off)."
+ },
{.key = {NULL}}
};
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index b1e7d2192..84ecbb308 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -36,16 +26,6 @@
#define VAL_LENGTH 8
#define READDIR_BUF 4096
-#define QUOTA_STACK_DESTROY(_frame, _this) \
- do { \
- quota_local_t *_local = NULL; \
- _local = _frame->local; \
- _frame->local = NULL; \
- STACK_DESTROY (_frame->root); \
- quota_local_cleanup (_this, _local); \
- GF_FREE (_local); \
- } while (0)
-
#define QUOTA_SAFE_INCREMENT(lock, var) \
do { \
LOCK (lock); \
@@ -60,12 +40,6 @@
UNLOCK (lock); \
} while (0)
-#define QUOTA_LOCAL_ALLOC_OR_GOTO(local, type, label) \
- do { \
- QUOTA_ALLOC_OR_GOTO (local, type, label); \
- LOCK_INIT (&local->lock); \
- } while (0)
-
#define QUOTA_ALLOC_OR_GOTO(var, type, label) \
do { \
var = GF_CALLOC (sizeof (type), 1, \
@@ -89,7 +63,6 @@
} \
STACK_UNWIND_STRICT (fop, frame, params); \
quota_local_cleanup (_this, _local); \
- GF_FREE (_local); \
} while (0)
#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
@@ -125,7 +98,7 @@
struct quota_dentry {
char *name;
- ino_t par;
+ uuid_t par;
struct list_head next;
};
typedef struct quota_dentry quota_dentry_t;
@@ -160,16 +133,19 @@ struct quota_local {
typedef struct quota_local quota_local_t;
struct quota_priv {
- int64_t timeout;
- struct list_head limit_head;
+ int64_t timeout;
+ gf_boolean_t consider_statfs;
+ struct list_head limit_head;
+ gf_lock_t lock;
};
typedef struct quota_priv quota_priv_t;
struct limits {
struct list_head limit_list;
char *path;
- int64_t value;
+ int64_t value;
+ uuid_t gfid;
};
-typedef struct limits limits_t;
+typedef struct limits limits_t;
uint64_t cn = 1;
diff --git a/xlators/features/read-only/src/Makefile.am b/xlators/features/read-only/src/Makefile.am
index 31ae4f340..4c1462137 100644
--- a/xlators/features/read-only/src/Makefile.am
+++ b/xlators/features/read-only/src/Makefile.am
@@ -4,18 +4,19 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
noinst_HEADERS = read-only-common.h
-read_only_la_LDFLAGS = -module -avoidversion
+read_only_la_LDFLAGS = -module -avoid-version
read_only_la_SOURCES = read-only.c read-only-common.c
read_only_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-worm_la_LDFLAGS = -module -avoidversion
+worm_la_LDFLAGS = -module -avoid-version
worm_la_SOURCES = read-only-common.c worm.c
worm_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c
index 5e4949ee0..56a7a7176 100644
--- a/xlators/features/read-only/src/read-only-common.c
+++ b/xlators/features/read-only/src/read-only-common.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -27,220 +17,223 @@
int32_t
ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL, xdata);
return 0;
}
int32_t
ro_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL, xdata);
return 0;
}
int32_t
ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+ entrylk_type type, dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type)
+ fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock)
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct gf_flock *flock)
+ struct gf_flock *flock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL, xdata);
return 0;
}
int32_t
ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
-ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
-ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
- STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int
ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *params)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
return 0;
}
int
ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
return 0;
}
int32_t
-ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int
-ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int
ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, xdata);
return 0;
}
int32_t
-ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, NULL,
- NULL);
+ NULL, xdata);
return 0;
}
int32_t
-ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
return 0;
}
int32_t
ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, xdata);
return 0;
}
static int32_t
ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
int32_t
ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
if (((flags & O_ACCMODE) == O_WRONLY) ||
((flags & O_ACCMODE) == O_RDWR)) {
- STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, xdata);
return 0;
}
STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
return 0;
}
int32_t
ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS, xdata);
return 0;
}
int32_t
-ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS, xdata);
return 0;
}
diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h
index 3bc008e59..5d4c7e260 100644
--- a/xlators/features/read-only/src/read-only-common.h
+++ b/xlators/features/read-only/src/read-only-common.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -27,97 +17,99 @@
int32_t
ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict);
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
int32_t
ro_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict);
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
int32_t
ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type);
+ entrylk_type type, dict_t *xdata);
int32_t
ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type
- type);
+ type, dict_t *xdata);
int32_t
ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock);
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
int32_t
ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock);
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
int32_t
ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t
ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid);
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
int32_t
ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid);
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
int32_t
-ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset);
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata);
int32_t
-ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset);
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata);
int
ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *params);
+ dev_t rdev, mode_t umask, dict_t *xdata);
int
ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params);
+ mode_t umask, dict_t *xdata);
int32_t
-ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc);
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
int
-ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags);
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
int
ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params);
+ loc_t *loc, mode_t umask, dict_t *xdata);
int32_t
-ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc);
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
-ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc);
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params);
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags);
+ fd_t *fd, dict_t *xdata);
int32_t
ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t
-ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags);
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata);
int32_t
ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref);
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t
ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c
index b11e84f24..e49e54a1b 100644
--- a/xlators/features/read-only/src/read-only.c
+++ b/xlators/features/read-only/src/read-only.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
index 790bc3234..16c3eb3da 100644
--- a/xlators/features/read-only/src/worm.c
+++ b/xlators/features/read-only/src/worm.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -28,25 +18,25 @@
static int32_t
worm_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
int32_t
worm_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
if ((((flags & O_ACCMODE) == O_WRONLY) ||
((flags & O_ACCMODE) == O_RDWR)) &&
!(flags & O_APPEND)) {
- STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, NULL);
return 0;
}
STACK_WIND (frame, worm_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
return 0;
}
@@ -75,7 +65,6 @@ fini (xlator_t *this)
}
struct xlator_fops fops = {
-
.open = worm_open,
.unlink = ro_unlink,
@@ -92,8 +81,7 @@ struct xlator_fops fops = {
.lk = ro_lk,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
{ .key = {NULL} },
diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am
index 4671d06d3..5251eb082 100644
--- a/xlators/features/trash/src/Makefile.am
+++ b/xlators/features/trash/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = trash.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-trash_la_LDFLAGS = -module -avoidversion
+trash_la_LDFLAGS = -module -avoid-version
trash_la_SOURCES = trash.c
trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = trash.h trash-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h
index 6608abf6a..0e6ef572f 100644
--- a/xlators/features/trash/src/trash-mem-types.h
+++ b/xlators/features/trash/src/trash-mem-types.h
@@ -1,30 +1,19 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_MEM_TYPES_H__
#define __TRASH_MEM_TYPES_H__
#include "mem-types.h"
enum gf_trash_mem_types_ {
- gf_trash_mt_trash_local_t = gf_common_mt_end + 1,
- gf_trash_mt_trash_private_t,
+ gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
gf_trash_mt_char,
gf_trash_mt_trash_elim_pattern_t,
gf_trash_mt_end
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index 396c1c904..addeb66a0 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -63,7 +53,7 @@ trash_local_wipe (trash_local_t *local)
if (local->newfd)
fd_unref (local->newfd);
- GF_FREE (local);
+ mem_put (local);
out:
return;
}
@@ -170,8 +160,7 @@ trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie);
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -442,8 +431,7 @@ trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -504,9 +492,7 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
trash_elim_pattern_t *trav = NULL;
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- time_t utime = 0;
+ char timestr[64] = {0,};
int32_t match = 0;
priv = this->private;
@@ -533,8 +519,7 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
return 0;
}
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
TRASH_STACK_UNWIND (rename, frame, -1, ENOMEM,
@@ -554,9 +539,8 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
{
/* append timestamp to file name */
/* TODO: can we make it optional? */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
+ gf_time_ftm (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
strcat (local->newpath, timestr);
}
@@ -575,9 +559,7 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
trash_elim_pattern_t *trav = NULL;
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- time_t utime = 0;
+ char timestr[64] = {0,};
int32_t match = 0;
priv = this->private;
@@ -610,8 +592,7 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
return 0;
}
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
TRASH_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL);
@@ -627,9 +608,8 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
/* append timestamp to file name */
/* TODO: can we make it optional? */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
strcat (local->newpath, timestr);
}
@@ -690,7 +670,7 @@ trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->fsize = stbuf->ia_size;
STACK_WIND (frame, trash_truncate_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, iobuf);
+ local->newfd, vector, count, local->cur_offset, 0, iobuf);
out:
return 0;
@@ -723,7 +703,7 @@ trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_truncate_readv_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset);
+ local->cur_offset, 0);
goto out;
}
@@ -763,7 +743,7 @@ trash_truncate_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_truncate_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset);
+ local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
out:
return 0;
@@ -932,8 +912,7 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -945,10 +924,8 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
+ char timestr[64] = {0,};
char loc_newname[PATH_MAX] = {0,};
- time_t utime = 0;
int32_t flags = 0;
priv = this->private;
@@ -980,9 +957,8 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
strcat (local->newpath, local->loc.path);
{
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
strcat (local->newpath, timestr);
}
strcpy (loc_newname,local->loc.name);
@@ -991,7 +967,6 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->newloc.name = gf_strdup (loc_newname);
local->newloc.path = gf_strdup (local->newpath);
local->newloc.inode = inode_new (local->loc.inode->table);
- local->newloc.ino = local->newloc.inode->ino;
local->newfd = fd_create (local->newloc.inode, frame->root->pid);
flags = O_CREAT|O_EXCL|O_WRONLY;
@@ -1045,8 +1020,7 @@ trash_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
LOCK_INIT (&frame->lock);
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
TRASH_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL);
@@ -1111,7 +1085,7 @@ trash_ftruncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_ftruncate_readv_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset);
+ local->cur_offset, 0);
return 0;
}
@@ -1143,7 +1117,7 @@ trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_ftruncate_writev_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, NULL);
+ local->newfd, vector, count, local->cur_offset, 0, NULL);
return 0;
}
@@ -1195,7 +1169,7 @@ trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_ftruncate_readv_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv, local->fd,
- (size_t)GF_BLOCK_READV_SIZE, local->cur_offset);
+ (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
return 0;
}
@@ -1300,8 +1274,7 @@ trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -1350,11 +1323,9 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
dentry_t *dir_entry = NULL;
- struct tm *tm = NULL;
char *pathbuf = NULL;
inode_t *newinode = NULL;
- time_t utime = 0;
- char timestr[256];
+ char timestr[64];
int32_t retval = 0;
int32_t match = 0;
@@ -1386,18 +1357,14 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
return 0;
}
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
TRASH_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
-
+ gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_F_HMS);
strcpy (local->newpath, priv->trash_dir);
strcat (local->newpath, pathbuf);
strcat (local->newpath, timestr);
@@ -1411,7 +1378,6 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
local->newloc.path = local->newpath;
local->loc.inode = inode_ref (fd->inode);
- local->loc.ino = fd->inode->ino;
local->loc.path = pathbuf;
local->fop_offset = offset;
@@ -1524,6 +1490,14 @@ init (xlator_t *this)
_priv->max_trash_file_size);
}
+ this->local_pool = mem_pool_new (trash_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ return -1;
+ }
+
+
this->private = (void *)_priv;
return 0;
}
@@ -1534,8 +1508,7 @@ fini (xlator_t *this)
trash_private_t *priv = NULL;
priv = this->private;
- if (priv)
- GF_FREE (priv);
+ GF_FREE (priv);
return;
}
diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h
index d385ee346..9a7c03361 100644
--- a/xlators/features/trash/src/trash.h
+++ b/xlators/features/trash/src/trash.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_H__
#define __TRASH_H__
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
index 3991d80ff..9e5357255 100644
--- a/xlators/lib/src/libxlator.c
+++ b/xlators/lib/src/libxlator.c
@@ -1,7 +1,44 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "mem-types.h"
#include "libxlator.h"
+int marker_xtime_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = -1,
+ [MCNT_ENODATA] = -1,
+ [MCNT_ENOTCONN] = -1,
+ [MCNT_ENOENT] = -1,
+ [MCNT_EOTHER] = -1,
+};
+
+int marker_uuid_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = 0,
+ [MCNT_ENODATA] = 0,
+ [MCNT_ENOTCONN] = 0,
+ [MCNT_ENOENT] = 0,
+ [MCNT_EOTHER] = 0,
+};
+
+static int marker_idx_errno_map[] = {
+ [MCNT_FOUND] = EINVAL,
+ [MCNT_NOTFOUND] = EINVAL,
+ [MCNT_ENOENT] = ENOENT,
+ [MCNT_ENOTCONN] = ENOTCONN,
+ [MCNT_ENODATA] = ENODATA,
+ [MCNT_EOTHER] = EINVAL,
+ [MCNT_MAX] = 0,
+};
+
/*Copy the contents of oldtimebuf to newtimbuf*/
static void
update_timebuf (uint32_t *oldtimbuf, uint32_t *newtimebuf)
@@ -35,139 +72,148 @@ match_uuid_local (const char *name, char *uuid)
return 0;
}
+static void
+marker_local_incr_errcount (xl_marker_local_t *local, int op_errno)
+{
+ marker_result_idx_t i = -1;
+ if (!local)
+ return;
+
+ switch (op_errno) {
+ case ENODATA:
+ i = MCNT_ENODATA;
+ break;
+ case ENOENT:
+ i = MCNT_ENOENT;
+ break;
+ case ENOTCONN:
+ i = MCNT_ENOTCONN;
+ break;
+ default:
+ i = MCNT_EOTHER;
+ break;
+ }
+ local->count[i]++;
+}
+
+static int
+evaluate_marker_results (int *gauge, int *count)
+{
+ int i = 0;
+ int op_errno = 0;
+ gf_boolean_t sane = _gf_true;
+
+ /* check if the policy of the gauge is violated;
+ * if yes, try to get the best errno, ie. look
+ * for the first position where there is a more
+ * specific kind of vioilation than the generic EINVAL
+ */
+ for (i = 0; i < MCNT_MAX; i++) {
+ if (sane) {
+ if ((gauge[i] > 0 && count[i] < gauge[i]) ||
+ (gauge[i] < 0 && count[i] >= -gauge[i])) {
+ sane = _gf_false;
+ /* generic action: adopt corresponding errno */
+ op_errno = marker_idx_errno_map[i];
+ }
+ } else {
+ /* already insane; trying to get a more informative
+ * errno by checking subsequent counters
+ */
+ if (count[i] > 0)
+ op_errno = marker_idx_errno_map[i];
+ }
+ if (op_errno && op_errno != EINVAL)
+ break;
+ }
+
+ return op_errno;
+}
/* Aggregate all the <volid>.xtime attrs of the cluster and send the max*/
int32_t
cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- int32_t callcnt = 0;
- int ret = -1;
- uint32_t *net_timebuf = NULL;
- uint32_t host_timebuf[2] = {0,};
- char *marker_xattr = NULL;
- struct marker_str *local = NULL;
- char *vol_uuid = NULL;
+ int32_t callcnt = 0;
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ char *marker_xattr = NULL;
+ xl_marker_local_t *local = NULL;
+ char *vol_uuid = NULL;
+ char need_unwind = 0;
if (!this || !frame || !frame->local || !cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
local = frame->local;
if (!local || !local->vol_uuid) {
gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
- if (local->esomerr) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- goto done;
- }
-
- vol_uuid = local->vol_uuid;
-
- if (op_ret && op_errno == ENODATA) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->enodata_count++;
- }
- goto done;
- }
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
- if (op_ret && op_errno == ENOENT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->enoent_count++;
- }
- goto done;
- }
+ vol_uuid = local->vol_uuid;
- if (op_ret && op_errno == ENOTCONN) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->enotconn_count++;
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
}
- goto done;
- }
- if (op_ret) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->esomerr = op_errno;
- }
- goto done;
- }
-
-
-
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (!gf_asprintf (& marker_xattr, "%s.%s.%s",
+ if (!gf_asprintf (&marker_xattr, "%s.%s.%s",
MARKER_XATTR_PREFIX, vol_uuid, XTIME)) {
op_errno = ENOMEM;
- goto done;
+ goto unlock;
}
if (dict_get_ptr (dict, marker_xattr, (void **)&net_timebuf)) {
gf_log (this->name, GF_LOG_WARNING,
"Unable to get <uuid>.xtime attr");
- local->noxtime_count++;
- goto done;
+ local->count[MCNT_NOTFOUND]++;
+ goto unlock;
}
- if (local->has_xtime) {
-
+ if (local->count[MCNT_FOUND]) {
get_hosttime (net_timebuf, host_timebuf);
if ( (host_timebuf[0]>local->host_timebuf[0]) ||
(host_timebuf[0] == local->host_timebuf[0] &&
host_timebuf[1] >= local->host_timebuf[1])) {
-
update_timebuf (net_timebuf, local->net_timebuf);
update_timebuf (host_timebuf, local->host_timebuf);
-
}
- }
- else {
+ } else {
get_hosttime (net_timebuf, local->host_timebuf);
update_timebuf (net_timebuf, local->net_timebuf);
- local->has_xtime = _gf_true;
+ local->count[MCNT_FOUND]++;
}
-
-
}
-done:
+unlock:
UNLOCK (&frame->lock);
if (!callcnt) {
-
op_ret = 0;
op_errno = 0;
- if (local->has_xtime) {
- if (!dict) {
+ need_unwind = 1;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict)
dict = dict_new();
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- }
+
ret = dict_set_static_bin (dict, marker_xattr,
(void *)local->net_timebuf, 8);
if (ret) {
@@ -175,178 +221,133 @@ done:
op_errno = ENOMEM;
goto out;
}
- goto out;
}
- if (local->noxtime_count)
- goto out;
-
- if (local->enodata_count) {
- op_ret = -1;
- op_errno = ENODATA;
- goto out;
- }
- if (local->enotconn_count) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
- if (local->enoent_count) {
+ op_errno = evaluate_marker_results (local->gauge, local->count);
+ if (op_errno)
op_ret = -1;
- op_errno = ENOENT;
- goto out;
- }
- else {
- op_errno = local->esomerr;
- goto out;
- }
-out:
- if (local->xl_specf_unwind) {
- frame->local = local->xl_local;
- local->xl_specf_unwind (frame, op_ret,
- op_errno, dict);
- return 0;
- }
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ }
+out:
+ if (need_unwind && local && local->xl_specf_unwind) {
+ frame->local = local->xl_local;
+ local->xl_specf_unwind (frame, op_ret,
+ op_errno, dict, xdata);
+ } else if (need_unwind) {
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
}
+ GF_FREE (marker_xattr);
return 0;
}
int32_t
cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- int32_t callcnt = 0;
- data_t *data = NULL;
- struct volume_mark *volmark = NULL;
- struct marker_str *marker = NULL;
- char *vol_uuid;
+ int32_t callcnt = 0;
+ struct volume_mark *volmark = NULL;
+ xl_marker_local_t *local = NULL;
+ int32_t ret = -1;
+ char need_unwind = 0;
+ char *vol_uuid = NULL;
if (!this || !frame || !cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
- marker = frame->local;
+ local = frame->local;
- if (!marker) {
+ if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
- vol_uuid = marker->vol_uuid;
-
- if (op_ret && (ENOENT == op_errno)) {
- LOCK (&frame->lock);
- {
- callcnt = --marker->call_count;
- marker->enoent_count++;
- }
- goto done;
- }
-
- if (op_ret && (ENOTCONN == op_errno)) {
- LOCK (&frame->lock);
- {
- callcnt = --marker->call_count;
- marker->enotconn_count++;
- }
- goto done;
- }
-
- if (!(data = dict_get (dict, GF_XATTR_MARKER_KEY))) {
- LOCK (&frame->lock);
- {
- callcnt = --marker->call_count;
- }
- goto done;
- }
-
- volmark = (struct volume_mark *)data->data;
-
LOCK (&frame->lock);
{
- callcnt = --marker->call_count;
+ callcnt = --local->call_count;
+ vol_uuid = local->vol_uuid;
- if (marker_has_volinfo (marker)) {
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
+ }
+
+ ret = dict_get_bin (dict, GF_XATTR_MARKER_KEY,
+ (void *)&volmark);
+ if (ret)
+ goto unlock;
- if ((marker->volmark->major != volmark->major) ||
- (marker->volmark->minor != volmark->minor)) {
+ if (local->count[MCNT_FOUND]) {
+ if ((local->volmark->major != volmark->major) ||
+ (local->volmark->minor != volmark->minor)) {
op_ret = -1;
op_errno = EINVAL;
- goto done;
- }
- else if (volmark->retval) {
- data_unref ((data_t *) marker->volmark);
- marker->volmark = volmark;
- callcnt = 0;
+ goto unlock;
}
- else if ( (volmark->sec > marker->volmark->sec) ||
- ((volmark->sec == marker->volmark->sec)
- && (volmark->usec >= marker->volmark->usec))) {
- GF_FREE (marker->volmark);
- marker->volmark = memdup (volmark, sizeof (struct volume_mark));
- VALIDATE_OR_GOTO (marker->volmark, done);
+ if (local->retval)
+ goto unlock;
+ else if (volmark->retval) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
+ local->retval = volmark->retval;
+ } else if ((volmark->sec > local->volmark->sec) ||
+ ((volmark->sec == local->volmark->sec) &&
+ (volmark->usec >= local->volmark->usec))) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
}
} else {
- marker->volmark = memdup (volmark, sizeof (struct volume_mark));
- VALIDATE_OR_GOTO (marker->volmark, done);
-
+ local->volmark = memdup (volmark, sizeof (*volmark));
+ VALIDATE_OR_GOTO (local->volmark, unlock);
uuid_unparse (volmark->uuid, vol_uuid);
if (volmark->retval)
- callcnt = 0;
+ local->retval = volmark->retval;
+ local->count[MCNT_FOUND]++;
}
}
-done:
+unlock:
UNLOCK (&frame->lock);
if (!callcnt) {
op_ret = 0;
op_errno = 0;
- if (marker_has_volinfo (marker)) {
- if (!dict) {
+ need_unwind = 1;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict)
dict = dict_new();
- if (!dict) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- }
+
if (dict_set_bin (dict, GF_XATTR_MARKER_KEY,
- marker->volmark,
+ local->volmark,
sizeof (struct volume_mark))) {
op_ret = -1;
op_errno = ENOMEM;
}
- goto out;
- }
- if (marker->enotconn_count) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
- if (marker->enoent_count) {
- op_ret = -1;
- op_errno = ENOENT;
}
- else {
+ op_errno = evaluate_marker_results (local->gauge, local->count);
+ if (op_errno)
op_ret = -1;
- op_errno = EINVAL;
- }
+ }
out:
- if (marker->xl_specf_unwind) {
- frame->local = marker->xl_local;
- marker->xl_specf_unwind (frame, op_ret,
- op_errno, dict);
- return 0;
- }
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ if (need_unwind && local && local->xl_specf_unwind) {
+ frame->local = local->xl_local;
+ local->xl_specf_unwind (frame, op_ret,
+ op_errno, dict, xdata);
+ return 0;
+ } else if (need_unwind){
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
}
return 0;
}
@@ -357,10 +358,10 @@ cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
const char *name, void *xl_local,
xlator_specf_unwind_t xl_specf_getxattr_unwind,
xlator_t **sub_volumes, int count, int type,
- char *vol_uuid)
+ int *gauge, char *vol_uuid)
{
- int i;
- struct marker_str *local;
+ int i = 0;
+ xl_marker_local_t *local = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -373,33 +374,36 @@ cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
local = GF_CALLOC (sizeof (struct marker_str), 1,
gf_common_mt_libxl_marker_local);
- local->xl_local = xl_local;
- frame->local = local;
+ if (!local)
+ goto err;
+ local->xl_local = xl_local;
local->call_count = count;
-
local->xl_specf_unwind = xl_specf_getxattr_unwind;
-
local->vol_uuid = vol_uuid;
+ memcpy (local->gauge, gauge, sizeof (local->gauge));
+
+ frame->local = local;
for (i=0; i < count; i++) {
if (MARKER_UUID_TYPE == type)
STACK_WIND (frame, cluster_markeruuid_cbk,
*(sub_volumes + i),
(*(sub_volumes + i))->fops->getxattr,
- loc, name);
+ loc, name, NULL);
else if (MARKER_XTIME_TYPE == type)
STACK_WIND (frame, cluster_markerxtime_cbk,
*(sub_volumes + i),
(*(sub_volumes + i))->fops->getxattr,
- loc, name);
+ loc, name, NULL);
else {
gf_log (this->name, GF_LOG_WARNING,
- "Unrecognized type of marker attr recived");
+ "Unrecognized type (%d) of marker attr "
+ "received", type);
STACK_WIND (frame, default_getxattr_cbk,
*(sub_volumes + i),
(*(sub_volumes + i))->fops->getxattr,
- loc, name);
+ loc, name, NULL);
break;
}
}
@@ -409,3 +413,58 @@ err:
return -1;
}
+
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ uint32_t host_value_timebuf[2] = {0,};
+
+ /* stime should be minimum of all the other nodes */
+ ret = dict_get_bin (dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin (value);
+ if (!value_timebuf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -1;
+ goto out;
+ }
+
+ get_hosttime (value_timebuf, host_value_timebuf);
+ get_hosttime (net_timebuf, host_timebuf);
+
+ /* can't use 'min()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] > host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] > host_timebuf[1]))) {
+ update_timebuf (value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE (net_timebuf);
+
+ return ret;
+}
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
index af7eb434a..1d5e1657f 100644
--- a/xlators/lib/src/libxlator.h
+++ b/xlators/lib/src/libxlator.h
@@ -1,3 +1,12 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _LIBXLATOR_H
#define _LIBXLATOR_H
@@ -26,7 +35,8 @@
typedef int32_t (*xlator_specf_unwind_t) (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict);
+ int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata);
struct volume_mark {
@@ -38,6 +48,69 @@ struct volume_mark {
uint32_t usec;
}__attribute__ ((__packed__));
+
+/*
+ * The enumerated type here
+ * is used to index two kind
+ * of integer arrays:
+ * - gauges
+ * - counters
+
+ * A counter is used internally,
+ * in getxattr callbacks, to count
+ * the results, categorized as
+ * the enum names suggest. So values
+ * in the counter are always non-negative.
+
+ * Gauges are part of the API.
+ * The caller passes one to the
+ * top-level aggregator function,
+ * cluster_getmarkerattr(). The gauge
+ * defines an evaluation policy for the
+ * counter. That is, at the
+ * end of the aggregation process
+ * the gauge is matched against the
+ * counter, and the policy
+ * represented by the gauge decides
+ * whether to return with success or failure,
+ * and in latter case, what particular failure
+ * case (errno).
+
+ * The rules are the following: for some index i,
+ * - if gauge[i] == 0, no requirement is set
+ * against counter[i];
+ * - if gauge[i] > 0, counter[i] >= gauge[i]
+ * is required;
+ * - if gauge[i] < 0, counter[i] < |gauge[i]|
+ * is required.
+
+ * If the requirement is not met, then i is mapped
+ * to the respective errno (MCNT_ENOENT -> ENOENT),
+ * or in lack of that, EINVAL.
+
+ * Cf. evaluate_marker_results() and marker_idx_errno_map[]
+ * in libxlator.c
+
+ * We provide two default gauges, one inteded for xtime
+ * aggregation, other for volume mark aggregation. The
+ * policies they represent agree with the hard-coded
+ * one prior to gauges. Cf. marker_xtime_default_gauge
+ * and marker_uuid_default_gauge in libxlator.c
+ */
+
+typedef enum {
+ MCNT_FOUND,
+ MCNT_NOTFOUND,
+ MCNT_ENODATA,
+ MCNT_ENOTCONN,
+ MCNT_ENOENT,
+ MCNT_EOTHER,
+ MCNT_MAX
+} marker_result_idx_t;
+
+extern int marker_xtime_default_gauge[];
+extern int marker_uuid_default_gauge[];
+
struct marker_str {
struct volume_mark *volmark;
data_t *data;
@@ -45,47 +118,36 @@ struct marker_str {
uint32_t host_timebuf[2];
uint32_t net_timebuf[2];
int32_t call_count;
- unsigned has_xtime:1;
- int32_t enoent_count;
- int32_t enotconn_count;
- int32_t enodata_count;
- int32_t noxtime_count;
-
- int esomerr;
+ int gauge[MCNT_MAX];
+ int count[MCNT_MAX];
xlator_specf_unwind_t xl_specf_unwind;
void *xl_local;
char *vol_uuid;
+ uint8_t retval;
};
-static inline gf_boolean_t
-marker_has_volinfo (struct marker_str *marker)
-{
- if (marker->volmark)
- return _gf_true;
- else
- return _gf_false;
-}
+typedef struct marker_str xl_marker_local_t;
int32_t
cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict);
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
int32_t
cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict);
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
int32_t
cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
const char *name, void *xl_local,
xlator_specf_unwind_t xl_specf_getxattr_unwind,
xlator_t **sub_volumes, int count, int type,
- char *vol_uuid);
+ int *gauge, char *vol_uuid);
int
match_uuid_local (const char *name, char *uuid);
-
-
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
#endif /* !_LIBXLATOR_H */
diff --git a/xlators/meta/src/Makefile.am b/xlators/meta/src/Makefile.am
index 385ff553f..f8fa7d4cb 100644
--- a/xlators/meta/src/Makefile.am
+++ b/xlators/meta/src/Makefile.am
@@ -4,7 +4,8 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/
meta_so_SOURCES = meta.c tree.c misc.c view.c
noinst_HEADERS = meta.h tree.h misc.h view.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall
CLEANFILES =
diff --git a/xlators/meta/src/meta-mem-types.h b/xlators/meta/src/meta-mem-types.h
index 9585b7838..62028b246 100644
--- a/xlators/meta/src/meta-mem-types.h
+++ b/xlators/meta/src/meta-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_MEM_TYPES_H__
#define __META_MEM_TYPES_H__
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
index 412b4a2b5..e69719f3c 100644
--- a/xlators/meta/src/meta.c
+++ b/xlators/meta/src/meta.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
index 5636487c9..73e0e50db 100644
--- a/xlators/meta/src/meta.h
+++ b/xlators/meta/src/meta.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_H__
#define __META_H__
diff --git a/xlators/meta/src/misc.c b/xlators/meta/src/misc.c
index bea07e70c..1a8dfa806 100644
--- a/xlators/meta/src/misc.c
+++ b/xlators/meta/src/misc.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <unistd.h>
#include <sys/uio.h>
diff --git a/xlators/meta/src/misc.h b/xlators/meta/src/misc.h
index f934a6d8d..30dd10e34 100644
--- a/xlators/meta/src/misc.h
+++ b/xlators/meta/src/misc.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MISC_H__
#define __MISC_H__
diff --git a/xlators/meta/src/tree.c b/xlators/meta/src/tree.c
index cad5cbd71..dacbd665a 100644
--- a/xlators/meta/src/tree.c
+++ b/xlators/meta/src/tree.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -123,7 +113,7 @@ lookup_meta_entry (meta_dirent_t *root, const char *path,
gf_asprintf (remain, "/%s/%s", *remain, piece);
else
gf_asprintf (remain, "/%s", piece);
- if (tmp) GF_FREE (tmp);
+ GF_FREE (tmp);
piece = strtok (NULL, "/");
}
}
diff --git a/xlators/meta/src/tree.h b/xlators/meta/src/tree.h
index 8157148db..985df3bd7 100644
--- a/xlators/meta/src/tree.h
+++ b/xlators/meta/src/tree.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TREE_H__
#define __TREE_H__
diff --git a/xlators/meta/src/view.c b/xlators/meta/src/view.c
index ba3c30e0b..b4e2d64a2 100644
--- a/xlators/meta/src/view.c
+++ b/xlators/meta/src/view.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/meta/src/view.h b/xlators/meta/src/view.h
index 440c0d34d..2eff6126e 100644
--- a/xlators/meta/src/view.h
+++ b/xlators/meta/src/view.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __VIEW_H__
#define __VIEW_H__
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index 714b78e62..933c44019 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -1,23 +1,50 @@
xlator_LTLIBRARIES = glusterd.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
-glusterd_la_LDFLAGS = -module -avoidversion $(LIBXML2_LIBS)
-glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c glusterd-op-sm.c \
- glusterd-utils.c glusterd-rpc-ops.c glusterd-store.c glusterd-handshake.c \
- glusterd-pmap.c glusterd-volgen.c glusterd-rebalance.c
+glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) "-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\""
+glusterd_la_LDFLAGS = -module -avoid-version
+if ENABLE_BD_XLATOR
+glusterd_la_LDFLAGS += -llvm2app
+endif
+glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
+ glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
+ glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \
+ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \
+ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
+ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \
+ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
+ glusterd-mgmt.c
-glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
- $(top_builddir)/rpc/xdr/src/libgfxdr.la\
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(XML_LIBS) -lcrypto
-noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h glusterd-sm.h \
- glusterd-store.h glusterd-mem-types.h glusterd-pmap.h glusterd-volgen.h
+noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
+ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
+ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \
+ glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \
+ glusterd-mgmt.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\
- -I$(rpclibdir) -L$(xlatordir)/ -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/rpc/xdr/src\
- -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/uuid -I$(top_srcdir)/contrib/md5 -DGFS_PREFIX=\"$(prefix)\" \
- -DDATADIR=\"$(localstatedir)\" -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(LIBXML2_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(CONTRIBDIR)/uuid \
+ -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(XML_CPPFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_LDFLAGS = -L$(xlatordir)
CLEANFILES =
+
+install-data-hook:
+
+if GF_INSTALL_VAR_LIB_GLUSTERD
+ $(mkdir_p) $(localstatedir)/lib/
+ (stat $(sysconfdir)/glusterd && \
+ mv $(sysconfdir)/glusterd $(localstatedir)/lib/) || true;
+ (ln -sf $(localstatedir)/lib/glusterd $(sysconfdir)/glusterd) || true;
+endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
new file mode 100644
index 000000000..596503c21
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -0,0 +1,1953 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "run.h"
+#include <sys/signal.h>
+
+/* misc */
+
+/* In this function, we decide, based on the 'count' of the brick,
+ where to add it in the current volume. 'count' tells us already
+ how many of the given bricks are added. other argument are self-
+ descriptive. */
+int
+add_brick_at_right_order (glusterd_brickinfo_t *brickinfo,
+ glusterd_volinfo_t *volinfo, int count,
+ int32_t stripe_cnt, int32_t replica_cnt)
+{
+ int idx = 0;
+ int i = 0;
+ int sub_cnt = 0;
+ glusterd_brickinfo_t *brick = NULL;
+
+ /* The complexity of the function is in deciding at which index
+ to add new brick. Even though it can be defined with a complex
+ single formula for all volume, it is seperated out to make it
+ more readable */
+ if (stripe_cnt) {
+ /* common formula when 'stripe_count' is set */
+ /* idx = ((count / ((stripe_cnt * volinfo->replica_count) -
+ volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) +
+ (count + volinfo->dist_leaf_count);
+ */
+
+ sub_cnt = volinfo->dist_leaf_count;
+
+ idx = ((count / ((stripe_cnt * volinfo->replica_count) -
+ sub_cnt)) * sub_cnt) +
+ (count + sub_cnt);
+
+ goto insert_brick;
+ }
+
+ /* replica count is set */
+ /* common formula when 'replica_count' is set */
+ /* idx = ((count / (replica_cnt - existing_replica_count)) *
+ existing_replica_count) +
+ (count + existing_replica_count);
+ */
+
+ sub_cnt = volinfo->replica_count;
+ idx = (count / (replica_cnt - sub_cnt) * sub_cnt) +
+ (count + sub_cnt);
+
+insert_brick:
+ i = 0;
+ list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ i++;
+ if (i < idx)
+ continue;
+ gf_log (THIS->name, GF_LOG_DEBUG, "brick:%s index=%d, count=%d",
+ brick->path, idx, count);
+
+ list_add (&brickinfo->brick_list, &brick->brick_list);
+ break;
+ }
+
+ return 0;
+}
+
+
+static int
+gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
+ int total_bricks, int *type, char *err_str,
+ size_t err_len)
+{
+ int ret = -1;
+
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ if ((volinfo->brick_count * stripe_count) == total_bricks) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'distribute' to 'stripe'", volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for stripe count (%d).",
+ (total_bricks - volinfo->brick_count),
+ stripe_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ if (!(total_bricks % (volinfo->replica_count * stripe_count))) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'replicate' to 'replicate-stripe'",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "stripe count to %d, need at least %d bricks",
+ (total_bricks - volinfo->brick_count),
+ stripe_count,
+ (volinfo->replica_count * stripe_count));
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ if (stripe_count < volinfo->stripe_count) {
+ snprintf (err_str, err_len,
+ "Incorrect stripe count (%d) supplied. "
+ "Volume already has stripe count (%d)",
+ stripe_count, volinfo->stripe_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ if (stripe_count == volinfo->stripe_count) {
+ if (!(total_bricks % volinfo->dist_leaf_count)) {
+ /* its same as the one which exists */
+ ret = 1;
+ goto out;
+ }
+ }
+ if (stripe_count > volinfo->stripe_count) {
+ /* We have to make sure before and after 'add-brick',
+ the number or subvolumes for distribute will remain
+ same, when stripe count is given */
+ if ((volinfo->brick_count * (stripe_count *
+ volinfo->replica_count)) ==
+ (total_bricks * volinfo->dist_leaf_count)) {
+ /* Change the dist_leaf_count */
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the stripe count of "
+ "volume %s from %d to %d",
+ volinfo->volname,
+ volinfo->stripe_count, stripe_count);
+ ret = 0;
+ goto out;
+ }
+ }
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static int
+gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
+ int total_bricks, int *type, char *err_str,
+ int err_len)
+{
+ int ret = -1;
+
+ /* replica count is set */
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ if ((volinfo->brick_count * replica_count) == total_bricks) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_REPLICATE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'distribute' to 'replica'", volinfo->volname);
+ ret = 0;
+ goto out;
+
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for replica count (%d).",
+ (total_bricks - volinfo->brick_count),
+ replica_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ if (!(total_bricks % (volinfo->dist_leaf_count * replica_count))) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'stripe' to 'replicate-stripe'",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "replica count to %d, need at least %d "
+ "bricks",
+ (total_bricks - volinfo->brick_count),
+ replica_count, (volinfo->dist_leaf_count *
+ replica_count));
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ if (replica_count < volinfo->replica_count) {
+ snprintf (err_str, err_len,
+ "Incorrect replica count (%d) supplied. "
+ "Volume already has (%d)",
+ replica_count, volinfo->replica_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ if (!(total_bricks % volinfo->dist_leaf_count)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ if (replica_count > volinfo->replica_count) {
+ /* We have to make sure before and after 'add-brick',
+ the number or subvolumes for distribute will remain
+ same, when replica count is given */
+ if ((total_bricks * volinfo->dist_leaf_count) ==
+ (volinfo->brick_count * (replica_count *
+ volinfo->stripe_count))) {
+ /* Change the dist_leaf_count */
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the replica count of "
+ "volume %s from %d to %d",
+ volinfo->volname, volinfo->replica_count,
+ replica_count);
+ ret = 0;
+ goto out;
+ }
+ }
+ break;
+ }
+out:
+ return ret;
+}
+
+static int
+gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
+ int32_t replica_count,
+ int32_t brick_count, char *err_str,
+ size_t err_len)
+{
+ int ret = -1;
+ int replica_nodes = 0;
+
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ case GF_CLUSTER_TYPE_STRIPE:
+ snprintf (err_str, err_len,
+ "replica count (%d) option given for non replicate "
+ "volume %s", replica_count, volinfo->volname);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
+ goto out;
+
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ /* in remove brick, you can only reduce the replica count */
+ if (replica_count > volinfo->replica_count) {
+ snprintf (err_str, err_len,
+ "given replica count (%d) option is more "
+ "than volume %s's replica count (%d)",
+ replica_count, volinfo->volname,
+ volinfo->replica_count);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ /* This means the 'replica N' option on CLI was
+ redundant. Check if the total number of bricks given
+ for removal is same as 'dist_leaf_count' */
+ if (brick_count % volinfo->dist_leaf_count) {
+ snprintf (err_str, err_len,
+ "number of bricks provided (%d) is "
+ "not valid. need at least %d "
+ "(or %dxN)", brick_count,
+ volinfo->dist_leaf_count,
+ volinfo->dist_leaf_count);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s",
+ err_str);
+ goto out;
+ }
+ ret = 1;
+ goto out;
+ }
+
+ replica_nodes = ((volinfo->brick_count /
+ volinfo->replica_count) *
+ (volinfo->replica_count - replica_count));
+
+ if (brick_count % replica_nodes) {
+ snprintf (err_str, err_len,
+ "need %d(xN) bricks for reducing replica "
+ "count of the volume from %d to %d",
+ replica_nodes, volinfo->replica_count,
+ replica_count);
+ goto out;
+ }
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Handler functions */
+int
+__glusterd_handle_add_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *bricks = NULL;
+ char *volname = NULL;
+ int brick_count = 0;
+ void *cli_rsp = NULL;
+ char err_str[2048] = {0,};
+ gf_cli_rsp rsp = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int total_bricks = 0;
+ int32_t replica_count = 0;
+ int32_t stripe_count = 0;
+ int type = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Garbage args received");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received add brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ if (!(ret = glusterd_check_volume_exists (volname))) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Volume %s does not exist",
+ volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "brick count");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_log (this->name, GF_LOG_INFO, "replica-count is %d",
+ replica_count);
+ }
+
+ ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
+ if (!ret) {
+ gf_log (this->name, GF_LOG_INFO, "stripe-count is %d",
+ stripe_count);
+ }
+
+ if (!dict_get (dict, "force")) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get flag");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volinfo "
+ "for volume name %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+
+ }
+
+ total_bricks = volinfo->brick_count + brick_count;
+
+ if (!stripe_count && !replica_count) {
+ if (volinfo->type == GF_CLUSTER_TYPE_NONE)
+ goto brick_val;
+
+ if ((volinfo->brick_count < volinfo->dist_leaf_count) &&
+ (total_bricks <= volinfo->dist_leaf_count))
+ goto brick_val;
+
+ if ((brick_count % volinfo->dist_leaf_count) != 0) {
+ snprintf (err_str, sizeof (err_str), "Incorrect number "
+ "of bricks supplied %d with count %d",
+ brick_count, volinfo->dist_leaf_count);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ /* done with validation.. below section is if stripe|replica
+ count is given */
+ }
+
+ /* These bricks needs to be added one per a replica or stripe volume */
+ if (stripe_count) {
+ ret = gd_addbr_validate_stripe_count (volinfo, stripe_count,
+ total_bricks, &type,
+ err_str,
+ sizeof (err_str));
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ /* if stripe count is same as earlier, set it back to 0 */
+ if (ret == 1)
+ stripe_count = 0;
+
+ ret = dict_set_int32 (dict, "stripe-count", stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the stripe-count in dict");
+ goto out;
+ }
+ goto brick_val;
+ }
+
+ ret = gd_addbr_validate_replica_count (volinfo, replica_count,
+ total_bricks,
+ &type, err_str,
+ sizeof (err_str));
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ /* if replica count is same as earlier, set it back to 0 */
+ if (ret == 1)
+ replica_count = 0;
+
+ ret = dict_set_int32 (dict, "replica-count", replica_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the replica-count in dict");
+ goto out;
+ }
+
+brick_val:
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "bricks");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ if (type != volinfo->type) {
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the new type in dict");
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_ADD_BRICK, dict);
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str), "Operation failed");
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+ ret = 0; //sent error to cli, prevent second reply
+ }
+
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ return ret;
+}
+
+int
+glusterd_handle_add_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+static int
+subvol_matcher_init (int **subvols, int count)
+{
+ int ret = -1;
+
+ *subvols = GF_CALLOC (count, sizeof(int), gf_gld_mt_int);
+ if (*subvols)
+ ret = 0;
+
+ return ret;
+}
+
+static void
+subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ glusterd_brickinfo_t *tmp = NULL;
+ int32_t sub_volume = 0;
+ int pos = 0;
+
+ list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+
+ if (strcmp (tmp->hostname, brickinfo->hostname) ||
+ strcmp (tmp->path, brickinfo->path)) {
+ pos++;
+ continue;
+ }
+ gf_log (THIS->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK,
+ brickinfo->hostname, brickinfo->path,
+ volinfo->volname);
+ sub_volume = (pos / volinfo->dist_leaf_count);
+ subvols[sub_volume]++;
+ break;
+ }
+
+}
+
+static int
+subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
+ size_t err_len, char *vol_type)
+{
+ int i = 0;
+ int ret = 0;
+
+ do {
+
+ if (subvols[i] % volinfo->dist_leaf_count == 0) {
+ continue;
+ } else {
+ ret = -1;
+ snprintf (err_str, err_len,
+ "Bricks not from same subvol for %s", vol_type);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ break;
+ }
+ } while (++i < volinfo->subvol_count);
+
+ return ret;
+}
+
+static void
+subvol_matcher_destroy (int *subvols)
+{
+ GF_FREE (subvols);
+}
+
+int
+__glusterd_handle_remove_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ int32_t count = 0;
+ char *brick = NULL;
+ char key[256] = {0,};
+ char *brick_list = NULL;
+ int i = 1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int *subvols = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ char err_str[2048] = {0};
+ gf_cli_rsp rsp = {0,};
+ void *cli_rsp = NULL;
+ char vol_type[256] = {0,};
+ int32_t replica_count = 0;
+ int32_t brick_index = 0;
+ int32_t tmp_brick_idx = 0;
+ int found = 0;
+ int diff_count = 0;
+ char *volname = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Received garbage args");
+ goto out;
+ }
+
+
+ gf_log (this->name, GF_LOG_INFO, "Received rem brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get brick "
+ "count");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),"Volume %s does not exist",
+ volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "request to change replica-count to %d", replica_count);
+ ret = gd_rmbr_validate_replica_count (volinfo, replica_count,
+ count, err_str,
+ sizeof (err_str));
+ if (ret < 0) {
+ /* logging and error msg are done in above function
+ itself */
+ goto out;
+ }
+ dict_del (dict, "replica-count");
+ if (ret) {
+ replica_count = 0;
+ } else {
+ ret = dict_set_int32 (dict, "replica-count",
+ replica_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the replica_count "
+ "in dict");
+ goto out;
+ }
+ }
+ }
+
+ /* 'vol_type' is used for giving the meaning full error msg for user */
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ strcpy (vol_type, "replica");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) {
+ strcpy (vol_type, "stripe");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
+ strcpy (vol_type, "stripe-replicate");
+ } else {
+ strcpy (vol_type, "distribute");
+ }
+
+ /* Do not allow remove-brick if the volume is a stripe volume*/
+ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
+ (volinfo->brick_count == volinfo->stripe_count)) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing brick from a stripe volume is not allowed");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof(err_str),
+ "Removing bricks from stripe-replicate"
+ " configuration is not allowed without reducing "
+ "replica or stripe count explicitly.");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing bricks from replicate configuration "
+ "is not allowed without reducing replica count "
+ "explicitly.");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Do not allow remove-brick if the bricks given is less than
+ the replica count or stripe count */
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
+ if (volinfo->dist_leaf_count &&
+ (count % volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof (err_str), "Remove brick "
+ "incorrect brick count of %d for %s %d",
+ count, vol_type, volinfo->dist_leaf_count);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char);
+
+ if (!brick_list) {
+ ret = -1;
+ goto out;
+ }
+
+ strcpy (brick_list, " ");
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_init (&subvols, volinfo->subvol_count);
+ if (ret)
+ goto out;
+ }
+
+ while ( i <= count) {
+ snprintf (key, sizeof (key), "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get %s",
+ key);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Remove brick count %d brick:"
+ " %s", i, brick);
+
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
+ &brickinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Incorrect brick "
+ "%s for volume %s", brick, volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ strcat(brick_list, brick);
+ strcat(brick_list, " ");
+
+ i++;
+ if ((volinfo->type == GF_CLUSTER_TYPE_NONE) ||
+ (volinfo->brick_count <= volinfo->dist_leaf_count))
+ continue;
+
+ if (replica_count) {
+ /* do the validation of bricks here */
+ /* -2 because i++ is already done, and i starts with 1,
+ instead of 0 */
+ diff_count = (volinfo->replica_count - replica_count);
+ brick_index = (((i -2) / diff_count) * volinfo->replica_count);
+ tmp_brick_idx = 0;
+ found = 0;
+ list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+ tmp_brick_idx++;
+ gf_log (this->name, GF_LOG_TRACE,
+ "validate brick %s:%s (%d %d %d)",
+ tmp->hostname, tmp->path, tmp_brick_idx,
+ brick_index, volinfo->replica_count);
+ if (tmp_brick_idx <= brick_index)
+ continue;
+ if (tmp_brick_idx >
+ (brick_index + volinfo->replica_count))
+ break;
+ if ((!strcmp (tmp->hostname,brickinfo->hostname)) &&
+ !strcmp (tmp->path, brickinfo->path)) {
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ snprintf (err_str, sizeof (err_str), "Bricks are from "
+ "same subvol");
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to validate brick %s:%s (%d %d %d)",
+ tmp->hostname, tmp->path, tmp_brick_idx,
+ brick_index, volinfo->replica_count);
+ ret = -1;
+ /* brick order is not valid */
+ goto out;
+ }
+
+ /* Find which subvolume the brick belongs to */
+ subvol_matcher_update (subvols, volinfo, brickinfo);
+ }
+
+ /* Check if the bricks belong to the same subvolumes.*/
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_verify (subvols, volinfo,
+ err_str, sizeof(err_str),
+ vol_type);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict);
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NUL