diff options
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 144 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 6 |
3 files changed, 23 insertions, 131 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 5d6737f72df..2cbd0ce4c90 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -741,7 +741,7 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode) uuid_copy (gfid_copy, inode->gfid); } - if (hashmode > 1) { + if (hashmode > 1 && inode->ia_type != IA_IFDIR) { /* * Why getpid? Because it's one of the cheapest calls * available - faster than gethostname etc. - and returns a @@ -2218,6 +2218,8 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd) goto out; } + fd_ctx->readdir_subvol = -1; + pthread_mutex_init (&fd_ctx->delay_lock, NULL); INIT_LIST_HEAD (&fd_ctx->eager_locked); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 41f5e60032d..af6a1787593 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -124,132 +124,6 @@ out: } -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) - -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) - -static uint64_t -afr_bits_for (uint64_t num) -{ - uint64_t bits = 0, ctrl = 1; - - while (ctrl < num) { - ctrl *= 2; - bits ++; - } - - return bits; -} - -int -afr_itransform (xlator_t *this, int subvol, uint64_t x, uint64_t *y_p) -{ - afr_private_t *conf = NULL; - int cnt = 0; - int max = 0; - uint64_t y = 0; - uint64_t hi_mask = 0; - uint64_t off_mask = 0; - int max_bits = 0; - - if (x == ((uint64_t) -1)) { - y = (uint64_t) -1; - goto out; - } - - conf = this->private; - if (!conf) - goto out; - - max = conf->child_count; - cnt = subvol; - - if (max == 1) { - y = x; - goto out; - } - - max_bits = afr_bits_for (max); - - hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); - - if (x & hi_mask) { - /* HUGE d_off */ - off_mask = MASK << max_bits; - y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt; - } else { - /* small d_off */ - y = ((x * max) + cnt); - } - -out: - if (y_p) - *y_p = y; - - return 0; -} - - -int -afr_deitransform (xlator_t *this, uint64_t y, int *subvol_p, - uint64_t *x_p) -{ - afr_private_t *conf = NULL; - int cnt = 0; - int max = 0; - uint64_t x = 0; - int subvol = 0; - int max_bits = 0; - uint64_t off_mask = 0; - uint64_t host_mask = 0; - - if (!this->private) - return -1; - - conf = this->private; - max = conf->child_count; - - if (max == 1) { - x = y; - cnt = 0; - goto out; - } - - if (y & TOP_BIT) { - /* HUGE d_off */ - max_bits = afr_bits_for (max); - off_mask = (MASK << max_bits); - host_mask = ~(off_mask); - - x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS; - - cnt = y & host_mask; - } else { - /* small d_off */ - cnt = y % max; - x = y / max; - } - -out: - subvol = cnt; - - if (subvol_p) - *subvol_p = subvol; - - if (x_p) - *x_p = x; - - return 0; -} - - static void afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol, gf_dirent_t *entries, fd_t *fd) @@ -273,7 +147,6 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol, } list_del_init (&entry->list); - afr_itransform (THIS, subvol, entry->d_off, &entry->d_off); list_add_tail (&entry->list, &entries->list); if (entry->inode) { @@ -333,9 +206,11 @@ afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol) { afr_local_t *local = NULL; afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; priv = this->private; local = frame->local; + fd_ctx = afr_fd_ctx_get (local->fd, this); if (subvol == -1) { AFR_STACK_UNWIND (readdir, frame, local->op_ret, @@ -343,6 +218,8 @@ afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol) return 0; } + fd_ctx->readdir_subvol = subvol; + if (local->op == GF_FOP_READDIR) STACK_WIND_COOKIE (frame, afr_readdir_cbk, (void *) (long) subvol, @@ -370,18 +247,27 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, afr_local_t *local = NULL; int32_t op_errno = 0; int subvol = -1; + afr_fd_ctx_t *fd_ctx = NULL; local = AFR_FRAME_INIT (frame, op_errno); if (!local) goto out; + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) { + op_errno = EINVAL; + goto out; + } + local->op = whichop; local->fd = fd_ref (fd); local->cont.readdir.size = size; local->cont.readdir.offset = offset; local->xdata_req = (dict)? dict_ref (dict) : NULL; - if (offset == 0) { + subvol = fd_ctx->readdir_subvol; + + if (offset == 0 || subvol == -1) { /* First readdir has option of failing over and selecting an appropriate read subvolume */ afr_read_txn (frame, this, fd->inode, afr_readdir_wind, @@ -389,8 +275,6 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } else { /* But continued readdirs MUST stick to the same subvolume without an option to failover */ - afr_deitransform (this, offset, &subvol, - (uint64_t *)&local->cont.readdir.offset); afr_readdir_wind (frame, this, subvol); } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 7e138c54ec0..4044fd59d4e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -301,6 +301,12 @@ typedef struct { /* list of frames currently in progress */ struct list_head eager_locked; + + /* the subvolume on which the latest sequence of readdirs (starting + at offset 0) has begun. Till the next readdir request with 0 offset + arrives, we continue to read off this subvol. + */ + int readdir_subvol; } afr_fd_ctx_t; |