diff options
author | Shehjar Tikoo <shehjart@gluster.com> | 2009-05-19 12:42:06 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2009-05-20 11:40:01 -0700 |
commit | 9a916de3f35dbbfe4399696891a0937d650bf72e (patch) | |
tree | ceae43722e677a5fbd7f31a419707be6a531c07a /xlators | |
parent | ef0af3ca33a007f2aae2016cc27b6d828367c987 (diff) |
io-threads: Support mem-pool allocator for iot_request_t
This commit brings in support for allocation of iot_request_t's
in io-threads through the use of the mem-pool. We're hoping
that the overheads of hundreds and thousands of small allocations
can be avoided through this.
The important point to note is that the memory pool is not
for the translator as a whole but there is one small memory
pool for each worker thread. Not only does that help us
avoid malloc overheads for small allocations like iot_request_t
but also avoid contention on the heap data structures when multiple
threads want an iot_request_t from the pool.
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/performance/io-threads/src/io-threads.c | 52 | ||||
-rw-r--r-- | xlators/performance/io-threads/src/io-threads.h | 15 |
2 files changed, 44 insertions, 23 deletions
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 813b7d73d81..a0d5d97df67 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -44,7 +44,7 @@ void _iot_queue (iot_worker_t *worker, iot_request_t *req); iot_request_t * -iot_init_request (call_stub_t *stub); +iot_init_request (iot_worker_t *conf, call_stub_t *stub); int iot_startup_workers (iot_worker_t **workers, int start_idx, int count, @@ -60,7 +60,7 @@ int iot_startup_worker (iot_worker_t *worker, iot_worker_fn workerfunc); void -iot_destroy_request (iot_request_t * req); +iot_destroy_request (iot_worker_t *worker, iot_request_t * req); /* I know this function modularizes things a bit too much, @@ -129,7 +129,7 @@ iot_schedule_unordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub) idx = iot_unordered_request_balancer (conf); selected_worker = conf->uworkers[idx]; - req = iot_init_request (stub); + req = iot_init_request (selected_worker, stub); if (req == NULL) { ret = -ENOMEM; goto out; @@ -138,7 +138,7 @@ iot_schedule_unordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub) ret = iot_request_queue_and_thread_fire (selected_worker, iot_worker_unordered, req); if (ret < 0) { - iot_destroy_request (req); + iot_destroy_request (selected_worker, req); } out: return ret; @@ -214,14 +214,6 @@ iot_schedule_ordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub) goto out; } - req = iot_init_request (stub); - if (req == NULL) { - gf_log (conf->this->name, GF_LOG_ERROR, - "out of memory"); - ret = -ENOMEM; - goto out; - } - LOCK (&inode->lock); { balstatus = iot_ordered_request_balancer (conf, inode, &idx); @@ -238,6 +230,14 @@ iot_schedule_ordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub) * added the request to the worker queue. */ selected_worker = conf->oworkers[idx]; + + req = iot_init_request (selected_worker, stub); + if (req == NULL) { + gf_log (conf->this->name, GF_LOG_ERROR,"out of memory"); + ret = -ENOMEM; + goto unlock_out; + } + ret = iot_request_queue_and_thread_fire (selected_worker, iot_worker_ordered, req); @@ -248,7 +248,7 @@ unlock_out: out: if (ret < 0) { if (req != NULL) { - iot_destroy_request (req); + iot_destroy_request (selected_worker, req); } } return ret; @@ -2234,11 +2234,11 @@ _iot_queue (iot_worker_t *worker, iot_request_t *req) iot_request_t * -iot_init_request (call_stub_t *stub) +iot_init_request (iot_worker_t *worker, call_stub_t *stub) { iot_request_t *req = NULL; - req = CALLOC (1, sizeof (iot_request_t)); + req = mem_get (worker->req_pool); if (req == NULL) { goto out; } @@ -2250,12 +2250,12 @@ out: void -iot_destroy_request (iot_request_t * req) +iot_destroy_request (iot_worker_t *worker, iot_request_t * req) { - if (req == NULL) + if ((req == NULL) || (worker == NULL)) return; - FREE (req); + mem_put (worker->req_pool, req); } @@ -2356,8 +2356,7 @@ iot_dequeue_ordered (iot_worker_t *worker) } out: pthread_mutex_unlock (&worker->qlock); - - FREE (req); + iot_destroy_request (worker, req); return stub; } @@ -2482,8 +2481,7 @@ iot_dequeue_unordered (iot_worker_t *worker) } out: pthread_mutex_unlock (&worker->qlock); - - FREE (req); + iot_destroy_request (worker, req); return stub; } @@ -2526,7 +2524,7 @@ deallocate_workers (iot_worker_t **workers, end_count = count + start_alloc_idx; for (i = start_alloc_idx; (i < end_count); i++) { if (workers[i] != NULL) { - free (workers[i]); + FREE (workers[i]); workers[i] = NULL; } } @@ -2556,6 +2554,10 @@ allocate_worker (iot_conf_t * conf) goto out; } + wrk->req_pool = mem_pool_new (iot_request_t, IOT_REQUEST_MEMPOOL_SIZE); + if (wrk->req_pool == NULL) + goto free_wrk; + INIT_LIST_HEAD (&wrk->rqlist); wrk->conf = conf; pthread_cond_init (&wrk->dq_cond, NULL); @@ -2564,6 +2566,10 @@ allocate_worker (iot_conf_t * conf) out: return wrk; + +free_wrk: + FREE (wrk); + return NULL; } diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h index 8075972b455..79c20275dd2 100644 --- a/xlators/performance/io-threads/src/io-threads.h +++ b/xlators/performance/io-threads/src/io-threads.h @@ -71,6 +71,20 @@ typedef enum { #define IOT_THREAD_STACK_SIZE ((size_t)(1024*1024)) +/* This signifies the max number of outstanding request we're expecting + * at a point for every worker thread. + * For an idea of the memory foot-print, consider at most 16 Bytes per + * iot_request_t on a 64-bit system with another 16 bytes per chunk in the + * header. For 64 slots in the pool, we'll use up 2 KiB, with 64 threads this + * goes up to 128 KiB. + * + * Note that this size defines the size of the per-worker mem pool. The + * advantage is that, we're not only reducing the rate of small iot_request_t + * allocations from the heap but also reducing the contention on the libc heap + * by having a mem pool, though small, for each worker. + */ +#define IOT_REQUEST_MEMPOOL_SIZE 64 + struct iot_worker { struct list_head rqlist; /* List of requests assigned to me. */ struct iot_conf *conf; @@ -91,6 +105,7 @@ struct iot_worker { would have been required to update centralized state inside conf. */ + struct mem_pool *req_pool; /* iot_request_t's come from here. */ }; struct iot_conf { |