diff options
Diffstat (limited to 'rpc/rpc-transport')
-rw-r--r-- | rpc/rpc-transport/rdma/src/rdma.c | 200 | ||||
-rw-r--r-- | rpc/rpc-transport/rdma/src/rdma.h | 10 |
2 files changed, 196 insertions, 14 deletions
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c index d3b9c6354b4..e6ed91e05c7 100644 --- a/rpc/rpc-transport/rdma/src/rdma.c +++ b/rpc/rpc-transport/rdma/src/rdma.c @@ -15,6 +15,7 @@ #include "dict.h" #include "glusterfs.h" +#include "iobuf.h" #include "logging.h" #include "rdma.h" #include "name.h" @@ -361,6 +362,135 @@ gf_rdma_post_recv (struct ibv_srq *srq, return ibv_post_srq_recv (srq, &wr, &bad_wr); } +int +gf_rdma_deregister_arena (struct list_head **mr_list, + struct iobuf_arena *iobuf_arena) +{ + gf_rdma_arena_mr *tmp = NULL; + int count = 0, i = 0; + + count = iobuf_arena->iobuf_pool->rdma_device_count; + for (i = 0; i < count; i++) { + list_for_each_entry(tmp, mr_list[i], list) { + if (tmp->iobuf_arena == iobuf_arena) { + if (ibv_dereg_mr(tmp->mr)) { + gf_log("rdma", GF_LOG_WARNING, + "deallocation of memory region " + "failed"); + return -1; + } + list_del(&tmp->list); + GF_FREE(tmp); + break; + } + } + } + + return 0; +} + + +int +gf_rdma_register_arena (void **arg1, void *arg2) +{ + struct ibv_mr *mr = NULL; + gf_rdma_arena_mr *new = NULL; + struct iobuf_pool *iobuf_pool = NULL; + gf_rdma_device_t **device = (gf_rdma_device_t **)arg1; + struct iobuf_arena *iobuf_arena = arg2; + int count = 0, i = 0; + + iobuf_pool = iobuf_arena->iobuf_pool; + count = iobuf_pool->rdma_device_count; + for (i = 0; i < count; i++) { + new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr), + gf_common_mt_rdma_arena_mr); + INIT_LIST_HEAD (&new->list); + new->iobuf_arena = iobuf_arena; + + mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base, + iobuf_arena->arena_size, + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE + ); + if (!mr) + gf_log("rdma", GF_LOG_WARNING, + "allocation of mr failed"); + + new->mr = mr; + list_add (&new->list, &device[i]->all_mr); + new = NULL; + } + + return 0; + +} + +static void +gf_rdma_register_iobuf_pool (rpc_transport_t *this) +{ + struct iobuf_pool *iobuf_pool = NULL; + struct iobuf_arena *tmp = NULL; + gf_rdma_private_t *priv = NULL; + gf_rdma_device_t *device = NULL; + struct ibv_mr *mr = NULL; + gf_rdma_arena_mr *new = NULL; + + priv = this->private; + device = priv->device; + iobuf_pool = this->ctx->iobuf_pool; + + if (!list_empty(&iobuf_pool->all_arenas)) { + + list_for_each_entry (tmp, &iobuf_pool->all_arenas, all_list) { + new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr), + gf_common_mt_rdma_arena_mr); + INIT_LIST_HEAD (&new->list); + new->iobuf_arena = tmp; + + mr = ibv_reg_mr(device->pd, tmp->mem_base, + tmp->arena_size, + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE); + if (!mr) { + gf_log ("rdma", GF_LOG_WARNING, "failed to pre" + " register buffers with rdma " + "devices."); + + } + new->mr = mr; + list_add (&new->list, &device->all_mr); + + new = NULL; + } + } + + return; +} + +static struct ibv_mr* +gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size) +{ + gf_rdma_arena_mr *tmp = NULL; + gf_rdma_private_t *priv = NULL; + gf_rdma_device_t *device = NULL; + + priv = this->private; + device = priv->device; + + if (!list_empty(&device->all_mr)) { + list_for_each_entry (tmp, &device->all_mr, list) { + if (tmp->iobuf_arena->mem_base <= ptr && + ptr < tmp->iobuf_arena->mem_base + + tmp->iobuf_arena->arena_size) + return tmp->mr; + } + } + + return NULL; +} static int32_t gf_rdma_create_posts (rpc_transport_t *this) @@ -510,11 +640,13 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx, int32_t i = 0; gf_rdma_device_t *trav = NULL, *device = NULL; gf_rdma_ctx_t *rdma_ctx = NULL; + struct iobuf_pool *iobuf_pool = NULL; priv = this->private; options = &priv->options; ctx = this->ctx; rdma_ctx = ctx->ib; + iobuf_pool = ctx->iobuf_pool; trav = rdma_ctx->device; @@ -530,10 +662,10 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx, if (trav == NULL) { goto out; } - priv->device = trav; trav->context = ibctx; - + iobuf_pool->device[iobuf_pool->rdma_device_count] = trav; + iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr; trav->request_ctx_pool = mem_pool_new (gf_rdma_request_context_t, GF_RDMA_POOL_SIZE); @@ -613,6 +745,9 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx, gf_rdma_queue_init (&trav->sendq); gf_rdma_queue_init (&trav->recvq); + INIT_LIST_HEAD (&trav->all_mr); + gf_rdma_register_iobuf_pool(this); + if (gf_rdma_create_posts (this) < 0) { gf_log (this->name, GF_LOG_ERROR, "could not allocate posts for device (%s)", @@ -1239,9 +1374,13 @@ __gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer, readch->rc_discrim = hton32 (1); readch->rc_position = hton32 (*pos); + mr = gf_rdma_get_pre_registred_mr(peer->trans, + (void *)vector[i].iov_base, vector[i].iov_len); + if (!mr) { mr = ibv_reg_mr (device->pd, vector[i].iov_base, vector[i].iov_len, IBV_ACCESS_REMOTE_READ); + } if (!mr) { gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, "memory registration failed (%s) (peer:%s)", @@ -1374,10 +1513,16 @@ __gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer, device = priv->device; for (i = 0; i < count; i++) { + + mr = gf_rdma_get_pre_registred_mr(peer->trans, + (void *)vector[i].iov_base, vector[i].iov_len); + if (!mr) { mr = ibv_reg_mr (device->pd, vector[i].iov_base, vector[i].iov_len, IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); + } + if (!mr) { gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, "memory registration failed (%s) (peer:%s)", @@ -1504,16 +1649,30 @@ out: static inline void -__gf_rdma_deregister_mr (struct ibv_mr **mr, int count) +__gf_rdma_deregister_mr (gf_rdma_device_t *device, + struct ibv_mr **mr, int count) { - int i = 0; + gf_rdma_arena_mr *tmp = NULL; + int i = 0; + int found = 0; - if (mr == NULL) { + if (mr == NULL) { goto out; } for (i = 0; i < count; i++) { - ibv_dereg_mr (mr[i]); + found = 0; + if (!list_empty(&device->all_mr)) { + list_for_each_entry(tmp, &device->all_mr, list) { + if (tmp->mr == mr[i]) { + found = 1; + break; + } + } + } + if (!found) + ibv_dereg_mr (mr[i]); + } out: @@ -1558,9 +1717,10 @@ gf_rdma_quota_put (gf_rdma_peer_t *peer) void __gf_rdma_request_context_destroy (gf_rdma_request_context_t *context) { - gf_rdma_peer_t *peer = NULL; - gf_rdma_private_t *priv = NULL; - int32_t ret = 0; + gf_rdma_peer_t *peer = NULL; + gf_rdma_private_t *priv = NULL; + gf_rdma_device_t *device = NULL; + int32_t ret = 0; if (context == NULL) { goto out; @@ -1568,9 +1728,10 @@ __gf_rdma_request_context_destroy (gf_rdma_request_context_t *context) peer = context->peer; - __gf_rdma_deregister_mr (context->mr, context->mr_count); - priv = peer->trans->private; + device = priv->device; + __gf_rdma_deregister_mr (device, context->mr, context->mr_count); + if (priv->connected) { ret = __gf_rdma_quota_put (peer); @@ -1602,13 +1763,14 @@ out: void -gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx) +gf_rdma_post_context_destroy (gf_rdma_device_t *device, + gf_rdma_post_context_t *ctx) { if (ctx == NULL) { goto out; } - __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count); + __gf_rdma_deregister_mr (device, ctx->mr, ctx->mr_count); if (ctx->iobref != NULL) { iobref_unref (ctx->iobref); @@ -1640,7 +1802,7 @@ gf_rdma_post_unref (gf_rdma_post_t *post) pthread_mutex_unlock (&post->lock); if (refcount == 0) { - gf_rdma_post_context_destroy (&post->ctx); + gf_rdma_post_context_destroy (post->device, &post->ctx); if (post->type == GF_RDMA_SEND_POST) { gf_rdma_put_post (&post->device->sendq, post); } else { @@ -2060,10 +2222,16 @@ __gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer, * Infiniband Architecture Specification Volume 1 * (Release 1.2.1) */ + ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr( + peer->trans, (void *)vector[i].iov_base, + vector[i].iov_len); + + if (!ctx->mr[ctx->mr_count]) { ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd, vector[i].iov_base, vector[i].iov_len, IBV_ACCESS_LOCAL_WRITE); + } if (ctx->mr[ctx->mr_count] == NULL) { gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, "registering memory for IBV_ACCESS_LOCAL_WRITE " @@ -4553,6 +4721,7 @@ int32_t init (rpc_transport_t *this) { gf_rdma_private_t *priv = NULL; + struct iobuf_pool *iobuf_pool = NULL; priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t); if (!priv) @@ -4565,6 +4734,9 @@ init (rpc_transport_t *this) "Failed to initialize IB Device"); return -1; } + iobuf_pool = this->ctx->iobuf_pool; + iobuf_pool->rdma_registration = gf_rdma_register_arena; + iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena; return 0; } diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h index 7f76244f071..fda01aa53ef 100644 --- a/rpc/rpc-transport/rdma/src/rdma.h +++ b/rpc/rpc-transport/rdma/src/rdma.h @@ -34,6 +34,7 @@ /* FIXME: give appropriate values to these macros */ #define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1) + /* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h . */ @@ -328,9 +329,18 @@ struct __gf_rdma_device { struct mem_pool *request_ctx_pool; struct mem_pool *ioq_pool; struct mem_pool *reply_info_pool; + struct list_head all_mr; }; typedef struct __gf_rdma_device gf_rdma_device_t; + +struct __gf_rdma_arena_mr { + struct list_head list; + struct iobuf_arena *iobuf_arena; + struct ibv_mr *mr; +}; + +typedef struct __gf_rdma_arena_mr gf_rdma_arena_mr; struct __gf_rdma_ctx { gf_rdma_device_t *device; struct rdma_event_channel *rdma_cm_event_channel; |