diff options
author | Poornima G <pgurusid@redhat.com> | 2018-11-21 12:09:39 +0530 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2018-12-18 09:35:24 +0000 |
commit | b87c397091bac6a4a6dec4e45a7671fad4a11770 (patch) | |
tree | 6f7eeff5be2ae69af0eba03add10103091639a6c /rpc | |
parent | d50f22e6ae410fdcde573b6015b97dc1573bbb7e (diff) |
iobuf: Get rid of pre allocated iobuf_pool and use per thread mem pool
The current implementation of iobuf_pool has two problems:
- prealloc of 12.5MB memory, this limits the scale factor of the gluster
processes due to RAM requirements
- lock contention, as the current implementation has one global
iobuf_pool lock. Credits for debugging and addressing the same goes to
Krutika Dhananjay <kdhananj@redhat.com>. Issue: #410
Hence changing the iobuf implementation to use per thread mem pool.
This may theoritically appear to cause perf dip as there is no preallocation.
But per thread mem pool will not have significant perf impact as the last
allocated memory is kept alive for subsequent allocs, for some time.
The worst case would be if iobufs requested are of random sizes each time.
The best case is, if we get iobuf request of the same size. From the perf
tests, this patch did not seem to cause any perf decrease.
Note that, with this patch, the rdma performance is going to degrade
drastically. In one of the previous patchsets we had fixes to not
degrade rdma perf, but rdma is not supported and also not tested [1].
Hence the decision was to not have code in rdma that is not tested
and not supported.
[1] https://lists.gluster.org/pipermail/gluster-users.old/2018-July/034400.html
Updates: #325
Change-Id: Ic2ef3bd498f9250dea25f25ba0c01fde19584b27
Signed-off-by: Poornima G <pgurusid@redhat.com>
Diffstat (limited to 'rpc')
-rw-r--r-- | rpc/rpc-transport/rdma/src/rdma.c | 251 | ||||
-rw-r--r-- | rpc/rpc-transport/rdma/src/rdma.h | 1 |
2 files changed, 4 insertions, 248 deletions
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c index 45605c8ff53..ac984c16924 100644 --- a/rpc/rpc-transport/rdma/src/rdma.c +++ b/rpc/rpc-transport/rdma/src/rdma.c @@ -344,207 +344,6 @@ gf_rdma_post_recv(struct ibv_srq *srq, gf_rdma_post_t *post) return ibv_post_srq_recv(srq, &wr, &bad_wr); } -static void -gf_rdma_deregister_iobuf_pool(gf_rdma_device_t *device) -{ - gf_rdma_arena_mr *arena_mr = NULL; - gf_rdma_arena_mr *tmp = NULL; - - while (device) { - pthread_mutex_lock(&device->all_mr_lock); - { - if (!list_empty(&device->all_mr)) { - list_for_each_entry_safe(arena_mr, tmp, &device->all_mr, list) - { - if (ibv_dereg_mr(arena_mr->mr)) { - gf_msg("rdma", GF_LOG_WARNING, 0, - RDMA_MSG_DEREGISTER_ARENA_FAILED, - "deallocation of memory region " - "failed"); - pthread_mutex_unlock(&device->all_mr_lock); - return; - } - list_del(&arena_mr->list); - GF_FREE(arena_mr); - } - } - } - pthread_mutex_unlock(&device->all_mr_lock); - - device = device->next; - } -} - -int -gf_rdma_deregister_arena(struct list_head **mr_list, - struct iobuf_arena *iobuf_arena) -{ - gf_rdma_arena_mr *tmp = NULL; - gf_rdma_arena_mr *dummy = NULL; - gf_rdma_device_t *device = NULL; - int count = 0, i = 0; - - count = iobuf_arena->iobuf_pool->rdma_device_count; - for (i = 0; i < count; i++) { - device = iobuf_arena->iobuf_pool->device[i]; - pthread_mutex_lock(&device->all_mr_lock); - { - list_for_each_entry_safe(tmp, dummy, mr_list[i], list) - { - if (tmp->iobuf_arena == iobuf_arena) { - if (ibv_dereg_mr(tmp->mr)) { - gf_msg("rdma", GF_LOG_WARNING, 0, - RDMA_MSG_DEREGISTER_ARENA_FAILED, - "deallocation of memory region " - "failed"); - pthread_mutex_unlock(&device->all_mr_lock); - return -1; - } - list_del(&tmp->list); - GF_FREE(tmp); - break; - } - } - } - pthread_mutex_unlock(&device->all_mr_lock); - } - - return 0; -} - -int -gf_rdma_register_arena(void **arg1, void *arg2) -{ - struct ibv_mr *mr = NULL; - gf_rdma_arena_mr *new = NULL; - struct iobuf_pool *iobuf_pool = NULL; - gf_rdma_device_t **device = (gf_rdma_device_t **)arg1; - struct iobuf_arena *iobuf_arena = arg2; - int count = 0, i = 0; - - iobuf_pool = iobuf_arena->iobuf_pool; - count = iobuf_pool->rdma_device_count; - for (i = 0; i < count; i++) { - new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr), - gf_common_mt_rdma_arena_mr); - if (new == NULL) { - gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED, - "Out of " - "memory: registering pre allocated buffer " - "with rdma device failed."); - return -1; - } - INIT_LIST_HEAD(&new->list); - new->iobuf_arena = iobuf_arena; - - mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base, - iobuf_arena->arena_size, - IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE); - if (!mr) - gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED, - "allocation of mr " - "failed"); - - new->mr = mr; - pthread_mutex_lock(&device[i]->all_mr_lock); - { - list_add(&new->list, &device[i]->all_mr); - } - pthread_mutex_unlock(&device[i]->all_mr_lock); - new = NULL; - } - - return 0; -} - -static void -gf_rdma_register_iobuf_pool(gf_rdma_device_t *device, - struct iobuf_pool *iobuf_pool) -{ - struct iobuf_arena *tmp = NULL; - struct iobuf_arena *dummy = NULL; - struct ibv_mr *mr = NULL; - gf_rdma_arena_mr *new = NULL; - - if (!list_empty(&iobuf_pool->all_arenas)) { - list_for_each_entry_safe(tmp, dummy, &iobuf_pool->all_arenas, all_list) - { - new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr), - gf_common_mt_rdma_arena_mr); - if (new == NULL) { - gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED, - "Out of " - "memory: registering pre allocated " - "buffer with rdma device failed."); - return; - } - INIT_LIST_HEAD(&new->list); - new->iobuf_arena = tmp; - - mr = ibv_reg_mr(device->pd, tmp->mem_base, tmp->arena_size, - IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE); - if (!mr) { - gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED, - "failed" - " to pre register buffers with rdma " - "devices."); - } - new->mr = mr; - pthread_mutex_lock(&device->all_mr_lock); - { - list_add(&new->list, &device->all_mr); - } - pthread_mutex_unlock(&device->all_mr_lock); - - new = NULL; - } - } - - return; -} - -static void -gf_rdma_register_iobuf_pool_with_device(gf_rdma_device_t *device, - struct iobuf_pool *iobuf_pool) -{ - while (device) { - gf_rdma_register_iobuf_pool(device, iobuf_pool); - device = device->next; - } -} - -static struct ibv_mr * -gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size) -{ - gf_rdma_arena_mr *tmp = NULL; - gf_rdma_arena_mr *dummy = NULL; - gf_rdma_private_t *priv = NULL; - gf_rdma_device_t *device = NULL; - - priv = this->private; - device = priv->device; - - pthread_mutex_lock(&device->all_mr_lock); - { - if (!list_empty(&device->all_mr)) { - list_for_each_entry_safe(tmp, dummy, &device->all_mr, list) - { - if (tmp->iobuf_arena->mem_base <= ptr && - ptr < tmp->iobuf_arena->mem_base + - tmp->iobuf_arena->arena_size) { - pthread_mutex_unlock(&device->all_mr_lock); - return tmp->mr; - } - } - } - } - pthread_mutex_unlock(&device->all_mr_lock); - - return NULL; -} - static int32_t gf_rdma_create_posts(rpc_transport_t *this) { @@ -693,13 +492,11 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx, int32_t i = 0; gf_rdma_device_t *trav = NULL, *device = NULL; gf_rdma_ctx_t *rdma_ctx = NULL; - struct iobuf_pool *iobuf_pool = NULL; priv = this->private; options = &priv->options; ctx = this->ctx; rdma_ctx = ctx->ib; - iobuf_pool = ctx->iobuf_pool; trav = rdma_ctx->device; @@ -720,8 +517,6 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx, trav->next = rdma_ctx->device; rdma_ctx->device = trav; - iobuf_pool->device[iobuf_pool->rdma_device_count] = trav; - iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr; trav->request_ctx_pool = mem_pool_new(gf_rdma_request_context_t, GF_RDMA_POOL_SIZE); if (trav->request_ctx_pool == NULL) { @@ -799,7 +594,6 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx, INIT_LIST_HEAD(&trav->all_mr); pthread_mutex_init(&trav->all_mr_lock, NULL); - gf_rdma_register_iobuf_pool(trav, iobuf_pool); if (gf_rdma_create_posts(this) < 0) { gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_ALOC_POST_FAILED, @@ -1435,12 +1229,8 @@ __gf_rdma_create_read_chunks_from_vector(gf_rdma_peer_t *peer, readch->rc_discrim = hton32(1); readch->rc_position = hton32(*pos); - mr = gf_rdma_get_pre_registred_mr( - peer->trans, (void *)vector[i].iov_base, vector[i].iov_len); - if (!mr) { - mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len, - IBV_ACCESS_REMOTE_READ); - } + mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len, + IBV_ACCESS_REMOTE_READ); if (!mr) { gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno, RDMA_MSG_MR_ALOC_FAILED, @@ -1561,13 +1351,8 @@ __gf_rdma_create_write_chunks_from_vector( device = priv->device; for (i = 0; i < count; i++) { - mr = gf_rdma_get_pre_registred_mr( - peer->trans, (void *)vector[i].iov_base, vector[i].iov_len); - if (!mr) { - mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len, - IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); - } - + mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len, + IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); if (!mr) { gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno, RDMA_MSG_MR_ALOC_FAILED, @@ -2248,9 +2033,6 @@ __gf_rdma_register_local_mr_for_rdma(gf_rdma_peer_t *peer, struct iovec *vector, * Infiniband Architecture Specification Volume 1 * (Release 1.2.1) */ - ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr( - peer->trans, (void *)vector[i].iov_base, vector[i].iov_len); - if (!ctx->mr[ctx->mr_count]) { ctx->mr[ctx->mr_count] = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len, @@ -4764,7 +4546,6 @@ init(rpc_transport_t *this) { gf_rdma_private_t *priv = NULL; gf_rdma_ctx_t *rdma_ctx = NULL; - struct iobuf_pool *iobuf_pool = NULL; priv = GF_CALLOC(1, sizeof(*priv), gf_common_mt_rdma_private_t); if (!priv) @@ -4783,18 +4564,6 @@ init(rpc_transport_t *this) if (!rdma_ctx) return -1; - pthread_mutex_lock(&rdma_ctx->lock); - { - if (this->dl_handle && (++(rdma_ctx->dlcount)) == 1) { - iobuf_pool = this->ctx->iobuf_pool; - iobuf_pool->rdma_registration = gf_rdma_register_arena; - iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena; - gf_rdma_register_iobuf_pool_with_device(rdma_ctx->device, - iobuf_pool); - } - } - pthread_mutex_unlock(&rdma_ctx->lock); - return 0; } @@ -4826,7 +4595,6 @@ fini(struct rpc_transport *this) { /* TODO: verify this function does graceful finish */ gf_rdma_private_t *priv = NULL; - struct iobuf_pool *iobuf_pool = NULL; gf_rdma_ctx_t *rdma_ctx = NULL; priv = this->private; @@ -4845,17 +4613,6 @@ fini(struct rpc_transport *this) if (!rdma_ctx) return; - pthread_mutex_lock(&rdma_ctx->lock); - { - if (this->dl_handle && (--(rdma_ctx->dlcount)) == 0) { - iobuf_pool = this->ctx->iobuf_pool; - gf_rdma_deregister_iobuf_pool(rdma_ctx->device); - iobuf_pool->rdma_registration = NULL; - iobuf_pool->rdma_deregistration = NULL; - } - } - pthread_mutex_unlock(&rdma_ctx->lock); - return; } diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h index 403f5678ad8..34a32070e49 100644 --- a/rpc/rpc-transport/rdma/src/rdma.h +++ b/rpc/rpc-transport/rdma/src/rdma.h @@ -325,7 +325,6 @@ typedef struct __gf_rdma_device gf_rdma_device_t; struct __gf_rdma_arena_mr { struct list_head list; - struct iobuf_arena *iobuf_arena; struct ibv_mr *mr; }; |