diff options
author | Dan Lambright <dlambrig@redhat.com> | 2015-02-18 14:49:50 -0500 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-18 04:47:41 -0700 |
commit | a216745e5db3fdb4fa8d625c971e70f8d0e34d23 (patch) | |
tree | c1165dbc612ec7121bd1734cb9bb006f9ac7c9d3 /xlators/cluster/ec | |
parent | 38ccaaf9d1a93c4fc6d733ee3bd5c73e5457bdab (diff) |
cluster/dht: Change the subvolume encoding in d_off to be a "global"
position in the graph rather than relative (local) to a particular
translator.
Encoding the volume in this way allows a single translator to manage
which brick is currently being scanned for directory entries. Using a
single translator minimizes allocated bits in the d_off. It also allows
multiple DHT translators in the same graph to have a common frame of
reference (the graph position) for which brick is being read. Multiple
DHT translators are needed for the Tiering feature.
The fix builds off a previous change (9332) which removed subvolume
encoding from AFR. The fix makes an equivalent change to the EC
translator.
More background can be found in fix 9332 and gluster-dev discussions [1].
DHT and AFR/EC are responsibile (as before) for choosing which brick to
enumerate directory entries in over the readdir lifecycle.
The client translator receiving the readdir fop encodes the dht_t. It
is referred to as the "leaf node" in the graph and corresponds to the
brick being scanned.
When DHT decodes the d_off, it translates the leaf node to a local
subvolume, which represents the next node in the graph leading to
the brick.
Tracking of leaf nodes is done in common utility functions. Leaf nodes
counts and positional information are updated on a graph switch.
[1] www.gluster.org/pipermail/gluster-devel/2015-January/043592.html
Change-Id: Iaf0ea86d7046b1ceadbad69d88707b243077ebc8
BUG: 1190734
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: http://review.gluster.org/9688
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/ec')
-rw-r--r-- | xlators/cluster/ec/src/ec-dir-read.c | 18 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 46 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.h | 2 |
3 files changed, 13 insertions, 53 deletions
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index c705b80fe82..17e1a3d124e 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -304,8 +304,6 @@ void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries) list_for_each_entry(entry, &entries->list, list) { - entry->d_off = ec_itransform(ec, idx, entry->d_off); - if (entry->d_stat.ia_type == IA_IFREG) { if ((entry->dict == NULL) || @@ -413,10 +411,20 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state) if (fop->offset != 0) { - int32_t idx; + int32_t idx = -1; + ec_t *ec = fop->xl->private; + + idx = gf_deitransform(fop->xl, fop->offset); + + if ((idx < 0) || (idx >= ec->nodes)) { - fop->offset = ec_deitransform(fop->xl->private, &idx, - fop->offset); + gf_log(fop->xl->name, GF_LOG_ERROR, + "Invalid index %d in readdirp request", idx); + + fop->error = EIO; + + return EC_STATE_REPORT; + } fop->mask &= 1ULL << idx; } diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 783e3d475ce..139957b55c6 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -16,17 +16,6 @@ #include "ec-fops.h" #include "ec-helpers.h" -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) - -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) - #ifndef ffsll #define ffsll(x) __builtin_ffsll(x) #endif @@ -106,41 +95,6 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...) } } -uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset) -{ - int32_t bits; - - if (offset == -1ULL) - { - return -1ULL; - } - - bits = ec->bits_for_nodes; - if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0) - { - return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx; - } - - return (offset * ec->nodes) + idx; -} - -uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset) -{ - uint64_t mask = 0; - - if ((offset & TOP_BIT) != 0) - { - mask = MASK << ec->bits_for_nodes; - - *idx = offset & ~mask; - return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS; - } - - *idx = offset % ec->nodes; - - return offset / ec->nodes; -} - int32_t ec_bits_count(uint64_t n) { n -= (n >> 1) & 0x5555555555555555ULL; diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index 5f5d9382532..11d2707b3c0 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -16,8 +16,6 @@ const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits); const char * ec_fop_name(int32_t id); void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...); -uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset); -uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset); int32_t ec_bits_count(uint64_t n); int32_t ec_bits_index(uint64_t n); int32_t ec_bits_consume(uint64_t * n); |