io-threads: Support mem-pool allocator for iot_request_t

This commit brings in support for allocation of iot_request_t's in io-threads through the use of the mem-pool. We're hoping that the overheads of hundreds and thousands of small allocations can be avoided through this. The important point to note is that the memory pool is not for the translator as a whole but there is one small memory pool for each worker thread. Not only does that help us avoid malloc overheads for small allocations like iot_request_t but also avoid contention on the heap data structures when multiple threads want an iot_request_t from the pool. Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
author: Shehjar Tikoo <shehjart@gluster.com> 2009-05-19 12:42:06 +0000
committer: Anand V. Avati <avati@dev.gluster.com> 2009-05-20 11:40:01 -0700
commit: 9a916de3f35dbbfe4399696891a0937d650bf72e (patch)
tree: ceae43722e677a5fbd7f31a419707be6a531c07a
parent: ef0af3ca33a007f2aae2016cc27b6d828367c987 (diff)
2 files changed, 44 insertions, 23 deletions
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 813b7d73d81..a0d5d97df67 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -44,7 +44,7 @@ void
 _iot_queue (iot_worker_t *worker, iot_request_t *req);
 
 iot_request_t *
-iot_init_request (call_stub_t *stub);
+iot_init_request (iot_worker_t *conf, call_stub_t *stub);
 
 int
 iot_startup_workers (iot_worker_t **workers, int start_idx, int count,
@@ -60,7 +60,7 @@ int
 iot_startup_worker (iot_worker_t *worker, iot_worker_fn workerfunc);
 
 void
-iot_destroy_request (iot_request_t * req);
+iot_destroy_request (iot_worker_t *worker, iot_request_t * req);
 
 
 /* I know this function modularizes things a bit too much,
@@ -129,7 +129,7 @@ iot_schedule_unordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
         idx = iot_unordered_request_balancer (conf);
         selected_worker = conf->uworkers[idx];
 
-        req = iot_init_request (stub);
+        req = iot_init_request (selected_worker, stub);
         if (req == NULL) {
                 ret = -ENOMEM;
                 goto out;
@@ -138,7 +138,7 @@ iot_schedule_unordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
         ret = iot_request_queue_and_thread_fire (selected_worker,
                                                  iot_worker_unordered, req);
         if (ret < 0) {
-                iot_destroy_request (req);
+                iot_destroy_request (selected_worker, req);
         }
 out:
         return ret;
@@ -214,14 +214,6 @@ iot_schedule_ordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
                 goto out;
         }
 
-        req = iot_init_request (stub);
-        if (req == NULL) {
-                gf_log (conf->this->name, GF_LOG_ERROR,
-                        "out of memory");
-                ret = -ENOMEM;
-                goto out;
-        }
-
         LOCK (&inode->lock);
         {
                 balstatus = iot_ordered_request_balancer (conf, inode, &idx);
@@ -238,6 +230,14 @@ iot_schedule_ordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
                  * added the request to the worker queue.
                  */
                 selected_worker = conf->oworkers[idx];
+
+                req = iot_init_request (selected_worker, stub);
+                if (req == NULL) {
+                        gf_log (conf->this->name, GF_LOG_ERROR,"out of memory");
+                        ret = -ENOMEM;
+                        goto unlock_out;
+                }
+
                 ret = iot_request_queue_and_thread_fire (selected_worker,
                                                          iot_worker_ordered,
                                                          req);
@@ -248,7 +248,7 @@ unlock_out:
 out:
         if (ret < 0) {
                 if (req != NULL) {
-                        iot_destroy_request (req);
+                        iot_destroy_request (selected_worker, req);
                 }
         }
         return ret;
@@ -2234,11 +2234,11 @@ _iot_queue (iot_worker_t *worker, iot_request_t *req)
 
 
 iot_request_t *
-iot_init_request (call_stub_t *stub)
+iot_init_request (iot_worker_t *worker, call_stub_t *stub)
 {
 	iot_request_t   *req = NULL;
 
-        req = CALLOC (1, sizeof (iot_request_t));
+        req = mem_get (worker->req_pool);
         if (req == NULL) {
                 goto out;
         }
@@ -2250,12 +2250,12 @@ out:
 
 
 void
-iot_destroy_request (iot_request_t * req)
+iot_destroy_request (iot_worker_t *worker, iot_request_t * req)
 {
-        if (req == NULL)
+        if ((req == NULL) || (worker == NULL))
                 return;
 
-        FREE (req);
+        mem_put (worker->req_pool, req);
 }
 
 
@@ -2356,8 +2356,7 @@ iot_dequeue_ordered (iot_worker_t *worker)
         }
 out:
 	pthread_mutex_unlock (&worker->qlock);
-
-	FREE (req);
+        iot_destroy_request (worker, req);
 
 	return stub;
 }
@@ -2482,8 +2481,7 @@ iot_dequeue_unordered (iot_worker_t *worker)
         }
 out:
 	pthread_mutex_unlock (&worker->qlock);
-
-	FREE (req);
+        iot_destroy_request (worker, req);
 
 	return stub;
 }
@@ -2526,7 +2524,7 @@ deallocate_workers (iot_worker_t **workers,
         end_count = count + start_alloc_idx;
         for (i = start_alloc_idx; (i < end_count); i++) {
                 if (workers[i] != NULL) {
-                        free (workers[i]);
+                        FREE (workers[i]);
                         workers[i] = NULL;
                 }
         }
@@ -2556,6 +2554,10 @@ allocate_worker (iot_conf_t * conf)
                 goto out;
         }
 
+        wrk->req_pool = mem_pool_new (iot_request_t, IOT_REQUEST_MEMPOOL_SIZE);
+        if (wrk->req_pool == NULL)
+                goto free_wrk;
+
         INIT_LIST_HEAD (&wrk->rqlist);
         wrk->conf = conf;
         pthread_cond_init (&wrk->dq_cond, NULL);
@@ -2564,6 +2566,10 @@ allocate_worker (iot_conf_t * conf)
 
 out:
         return wrk;
+
+free_wrk:
+        FREE (wrk);
+        return NULL;
 }
 
 
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 8075972b455..79c20275dd2 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -71,6 +71,20 @@ typedef enum {
 
 #define IOT_THREAD_STACK_SIZE   ((size_t)(1024*1024))
 
+/* This signifies the max number of outstanding request we're expecting
+ * at a point for every worker thread.
+ * For an idea of the memory foot-print, consider at most 16 Bytes per
+ * iot_request_t on a 64-bit system with another 16 bytes per chunk in the
+ * header. For 64 slots in the pool, we'll use up 2 KiB, with 64 threads this
+ * goes up to 128 KiB.
+ *
+ * Note that this size defines the size of the per-worker mem pool. The
+ * advantage is that, we're not only reducing the rate of small iot_request_t
+ * allocations from the heap but also reducing the contention on the libc heap
+ * by having a mem pool, though small, for each worker.
+ */
+#define IOT_REQUEST_MEMPOOL_SIZE        64
+
 struct iot_worker {
         struct list_head rqlist;      /* List of requests assigned to me. */
         struct iot_conf  *conf;
@@ -91,6 +105,7 @@ struct iot_worker {
                                               would have been required to update
                                               centralized state inside conf.
                                            */
+        struct mem_pool  *req_pool;    /* iot_request_t's come from here. */
 };
 
 struct iot_conf {
author	Shehjar Tikoo <shehjart@gluster.com>	2009-05-19 12:42:06 +0000
committer	Anand V. Avati <avati@dev.gluster.com>	2009-05-20 11:40:01 -0700
commit	9a916de3f35dbbfe4399696891a0937d650bf72e (patch)
tree	ceae43722e677a5fbd7f31a419707be6a531c07a
parent	ef0af3ca33a007f2aae2016cc27b6d828367c987 (diff)