From 37653efdc7681d1b0f255054ec2f9c9ddd4c8b14 Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Fri, 4 Jan 2019 07:04:50 +0000 Subject: Revert "iobuf: Get rid of pre allocated iobuf_pool and use per thread mem pool" This reverts commit b87c397091bac6a4a6dec4e45a7671fad4a11770. There seems to be some performance regression with the patch and hence recommended to have it reverted. Updates: #325 Change-Id: Id85d6203173a44fad6cf51d39b3e96f37afcec09 --- libglusterfs/src/glusterfs/iobuf.h | 111 ++++- libglusterfs/src/iobuf.c | 836 ++++++++++++++++++++++++++++++++++--- libglusterfs/src/mem-pool.c | 2 +- 3 files changed, 877 insertions(+), 72 deletions(-) (limited to 'libglusterfs') diff --git a/libglusterfs/src/glusterfs/iobuf.h b/libglusterfs/src/glusterfs/iobuf.h index 7516bc8034b..6de0f13ae36 100644 --- a/libglusterfs/src/glusterfs/iobuf.h +++ b/libglusterfs/src/glusterfs/iobuf.h @@ -17,38 +17,118 @@ #include #include +#define GF_VARIABLE_IOBUF_COUNT 32 + +#define GF_RDMA_DEVICE_COUNT 8 + +/* Lets try to define the new anonymous mapping + * flag, in case the system is still using the + * now deprecated MAP_ANON flag. + * + * Also, this should ideally be in a centralized/common + * header which can be used by other source files also. + */ +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + #define GF_ALIGN_BUF(ptr, bound) \ ((void *)((unsigned long)(ptr + bound - 1) & (unsigned long)(~(bound - 1)))) #define GF_IOBUF_ALIGN_SIZE 512 -#define GF_IOBUF_DEFAULT_PAGE_SIZE (128 * GF_UNIT_KB) - /* one allocatable unit for the consumers of the IOBUF API */ /* each unit hosts @page_size bytes of memory */ struct iobuf; +/* one region of memory mapped from the operating system */ +/* each region MMAPs @arena_size bytes of memory */ +/* each arena hosts @arena_size / @page_size IOBUFs */ +struct iobuf_arena; + /* expandable and contractable pool of memory, internally broken into arenas */ struct iobuf_pool; +struct iobuf_init_config { + size_t pagesize; + int32_t num_pages; +}; + struct iobuf { - gf_boolean_t stdalloc; /* indicates whether iobuf is allocated from - mem pool or standard alloc*/ - gf_lock_t lock; /* for ->ptr and ->ref */ - gf_atomic_t ref; /* 0 == passive, >0 == active */ + union { + struct list_head list; + struct { + struct iobuf *next; + struct iobuf *prev; + }; + }; + struct iobuf_arena *iobuf_arena; + + gf_lock_t lock; /* for ->ptr and ->ref */ + gf_atomic_t ref; /* 0 == passive, >0 == active */ void *ptr; /* usable memory region by the consumer */ - void *free_ptr; /* in case of stdalloc, this is the - one to be freed */ - size_t page_size; /* iobuf's page size */ - struct iobuf_pool *iobuf_pool; /* iobuf_pool iobuf is associated with */ + void *free_ptr; /* in case of stdalloc, this is the + one to be freed */ +}; + +struct iobuf_arena { + union { + struct list_head list; + struct { + struct iobuf_arena *next; + struct iobuf_arena *prev; + }; + }; + + struct list_head all_list; + size_t page_size; /* size of all iobufs in this arena */ + size_t arena_size; + /* this is equal to rounded_size * num_iobufs. + (rounded_size comes with gf_iobuf_get_pagesize().) */ + size_t page_count; + + struct iobuf_pool *iobuf_pool; + + void *mem_base; + struct iobuf *iobufs; /* allocated iobufs list */ + + int active_cnt; + struct iobuf active; /* head node iobuf + (unused by itself) */ + int passive_cnt; + struct iobuf passive; /* head node iobuf + (unused by itself) */ + uint64_t alloc_cnt; /* total allocs in this pool */ + int max_active; /* max active buffers at a given time */ }; struct iobuf_pool { - gf_atomic_t mem_pool_hit; - gf_atomic_t mem_pool_miss; - gf_atomic_t active_cnt; + pthread_mutex_t mutex; + size_t arena_size; /* size of memory region in + arena */ + size_t default_page_size; /* default size of iobuf */ + + int arena_cnt; + struct list_head all_arenas; + struct list_head arenas[GF_VARIABLE_IOBUF_COUNT]; + /* array of arenas. Each element of the array is a list of arenas + holding iobufs of particular page_size */ + + struct list_head filled[GF_VARIABLE_IOBUF_COUNT]; + /* array of arenas without free iobufs */ + + struct list_head purge[GF_VARIABLE_IOBUF_COUNT]; + /* array of of arenas which can be purged */ + + uint64_t request_misses; /* mostly the requests for higher + value of iobufs */ + int rdma_device_count; + struct list_head *mr_list[GF_RDMA_DEVICE_COUNT]; + void *device[GF_RDMA_DEVICE_COUNT]; + int (*rdma_registration)(void **, void *); + int (*rdma_deregistration)(struct list_head **, struct iobuf_arena *); }; struct iobuf_pool * @@ -62,10 +142,13 @@ iobuf_unref(struct iobuf *iobuf); struct iobuf * iobuf_ref(struct iobuf *iobuf); void +iobuf_pool_destroy(struct iobuf_pool *iobuf_pool); +void iobuf_to_iovec(struct iobuf *iob, struct iovec *iov); #define iobuf_ptr(iob) ((iob)->ptr) -#define iobuf_pagesize(iob) (iob->page_size) +#define iobpool_default_pagesize(iobpool) ((iobpool)->default_page_size) +#define iobuf_pagesize(iob) (iob->iobuf_arena->page_size) struct iobref { gf_lock_t lock; diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c index aff0a3121ff..0e37c4fc6e2 100644 --- a/libglusterfs/src/iobuf.c +++ b/libglusterfs/src/iobuf.c @@ -12,80 +12,588 @@ #include "glusterfs/statedump.h" #include #include "glusterfs/libglusterfs-messages.h" -#include "glusterfs/atomic.h" + +/* + TODO: implement destroy margins and prefetching of arenas +*/ + +#define IOBUF_ARENA_MAX_INDEX \ + (sizeof(gf_iobuf_init_config) / (sizeof(struct iobuf_init_config))) + +/* Make sure this array is sorted based on pagesize */ +struct iobuf_init_config gf_iobuf_init_config[] = { + /* { pagesize, num_pages }, */ + {128, 1024}, {512, 512}, {2 * 1024, 512}, {8 * 1024, 128}, + {32 * 1024, 64}, {128 * 1024, 32}, {256 * 1024, 8}, {1 * 1024 * 1024, 2}, +}; + +static int +gf_iobuf_get_arena_index(const size_t page_size) +{ + int i; + + for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { + if (page_size <= gf_iobuf_init_config[i].pagesize) + return i; + } + + return -1; +} + +static size_t +gf_iobuf_get_pagesize(const size_t page_size) +{ + int i; + size_t size = 0; + + for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { + size = gf_iobuf_init_config[i].pagesize; + if (page_size <= size) + return size; + } + + return -1; +} + +void +__iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena) +{ + int iobuf_cnt = 0; + struct iobuf *iobuf = NULL; + int offset = 0; + int i = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); + + iobuf_cnt = iobuf_arena->page_count; + + iobuf_arena->iobufs = GF_CALLOC(sizeof(*iobuf), iobuf_cnt, + gf_common_mt_iobuf); + if (!iobuf_arena->iobufs) + return; + + iobuf = iobuf_arena->iobufs; + for (i = 0; i < iobuf_cnt; i++) { + INIT_LIST_HEAD(&iobuf->list); + LOCK_INIT(&iobuf->lock); + + iobuf->iobuf_arena = iobuf_arena; + + iobuf->ptr = iobuf_arena->mem_base + offset; + + list_add(&iobuf->list, &iobuf_arena->passive.list); + iobuf_arena->passive_cnt++; + + offset += iobuf_arena->page_size; + iobuf++; + } + +out: + return; +} + +void +__iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena) +{ + int iobuf_cnt = 0; + struct iobuf *iobuf = NULL; + int i = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); + + iobuf_cnt = iobuf_arena->page_count; + + if (!iobuf_arena->iobufs) { + gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND, + "iobufs not found"); + return; + } + + iobuf = iobuf_arena->iobufs; + for (i = 0; i < iobuf_cnt; i++) { + GF_ASSERT(GF_ATOMIC_GET(iobuf->ref) == 0); + + LOCK_DESTROY(&iobuf->lock); + list_del_init(&iobuf->list); + iobuf++; + } + + GF_FREE(iobuf_arena->iobufs); + +out: + return; +} + +void +__iobuf_arena_destroy(struct iobuf_pool *iobuf_pool, + struct iobuf_arena *iobuf_arena) +{ + GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); + + if (iobuf_pool->rdma_deregistration) + iobuf_pool->rdma_deregistration(iobuf_pool->mr_list, iobuf_arena); + + __iobuf_arena_destroy_iobufs(iobuf_arena); + + if (iobuf_arena->mem_base && iobuf_arena->mem_base != MAP_FAILED) + munmap(iobuf_arena->mem_base, iobuf_arena->arena_size); + + GF_FREE(iobuf_arena); +out: + return; +} + +struct iobuf_arena * +__iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size, + int32_t num_iobufs) +{ + struct iobuf_arena *iobuf_arena = NULL; + size_t rounded_size = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); + + iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena); + if (!iobuf_arena) + goto err; + + INIT_LIST_HEAD(&iobuf_arena->list); + INIT_LIST_HEAD(&iobuf_arena->all_list); + INIT_LIST_HEAD(&iobuf_arena->active.list); + INIT_LIST_HEAD(&iobuf_arena->passive.list); + iobuf_arena->iobuf_pool = iobuf_pool; + + rounded_size = gf_iobuf_get_pagesize(page_size); + + iobuf_arena->page_size = rounded_size; + iobuf_arena->page_count = num_iobufs; + + iobuf_arena->arena_size = rounded_size * num_iobufs; + + iobuf_arena->mem_base = mmap(NULL, iobuf_arena->arena_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (iobuf_arena->mem_base == MAP_FAILED) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED, + "mapping failed"); + goto err; + } + + if (iobuf_pool->rdma_registration) { + iobuf_pool->rdma_registration(iobuf_pool->device, iobuf_arena); + } + + list_add_tail(&iobuf_arena->all_list, &iobuf_pool->all_arenas); + + __iobuf_arena_init_iobufs(iobuf_arena); + if (!iobuf_arena->iobufs) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED, + "init failed"); + goto err; + } + + iobuf_pool->arena_cnt++; + + return iobuf_arena; + +err: + __iobuf_arena_destroy(iobuf_pool, iobuf_arena); + +out: + return NULL; +} + +static struct iobuf_arena * +__iobuf_arena_unprune(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int index) +{ + struct iobuf_arena *iobuf_arena = NULL; + struct iobuf_arena *tmp = NULL; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); + + list_for_each_entry(tmp, &iobuf_pool->purge[index], list) + { + list_del_init(&tmp->list); + iobuf_arena = tmp; + break; + } +out: + return iobuf_arena; +} + +static struct iobuf_arena * +__iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int32_t num_pages, const int index) +{ + struct iobuf_arena *iobuf_arena = NULL; + + iobuf_arena = __iobuf_arena_unprune(iobuf_pool, page_size, index); + + if (!iobuf_arena) { + iobuf_arena = __iobuf_arena_alloc(iobuf_pool, page_size, num_pages); + if (!iobuf_arena) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, + "arena not found"); + return NULL; + } + } + list_add(&iobuf_arena->list, &iobuf_pool->arenas[index]); + + return iobuf_arena; +} + +/* This function destroys all the iobufs and the iobuf_pool */ +void +iobuf_pool_destroy(struct iobuf_pool *iobuf_pool) +{ + struct iobuf_arena *iobuf_arena = NULL; + struct iobuf_arena *tmp = NULL; + int i = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); + + pthread_mutex_lock(&iobuf_pool->mutex); + { + for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { + list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->arenas[i], + list) + { + list_del_init(&iobuf_arena->list); + iobuf_pool->arena_cnt--; + + __iobuf_arena_destroy(iobuf_pool, iobuf_arena); + } + list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i], + list) + { + list_del_init(&iobuf_arena->list); + iobuf_pool->arena_cnt--; + __iobuf_arena_destroy(iobuf_pool, iobuf_arena); + } + /* If there are no iobuf leaks, there should be no + * arenas in the filled list. If at all there are any + * arenas in the filled list, the below function will + * assert. + */ + list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->filled[i], + list) + { + list_del_init(&iobuf_arena->list); + iobuf_pool->arena_cnt--; + __iobuf_arena_destroy(iobuf_pool, iobuf_arena); + } + /* If there are no iobuf leaks, there shoould be + * no standard allocated arenas, iobuf_put will free + * such arenas. + * TODO: Free the stdalloc arenas forcefully if present? + */ + } + } + pthread_mutex_unlock(&iobuf_pool->mutex); + + pthread_mutex_destroy(&iobuf_pool->mutex); + + GF_FREE(iobuf_pool); + +out: + return; +} + +static void +iobuf_create_stdalloc_arena(struct iobuf_pool *iobuf_pool) +{ + struct iobuf_arena *iobuf_arena = NULL; + + /* No locking required here as its called only once during init */ + iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena); + if (!iobuf_arena) + goto err; + + INIT_LIST_HEAD(&iobuf_arena->list); + INIT_LIST_HEAD(&iobuf_arena->active.list); + INIT_LIST_HEAD(&iobuf_arena->passive.list); + + iobuf_arena->iobuf_pool = iobuf_pool; + + iobuf_arena->page_size = 0x7fffffff; + + list_add_tail(&iobuf_arena->list, + &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX]); + +err: + return; +} struct iobuf_pool * iobuf_pool_new(void) { struct iobuf_pool *iobuf_pool = NULL; + int i = 0; + size_t page_size = 0; + size_t arena_size = 0; + int32_t num_pages = 0; + int index; iobuf_pool = GF_CALLOC(sizeof(*iobuf_pool), 1, gf_common_mt_iobuf_pool); if (!iobuf_pool) goto out; + INIT_LIST_HEAD(&iobuf_pool->all_arenas); + pthread_mutex_init(&iobuf_pool->mutex, NULL); + for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) { + INIT_LIST_HEAD(&iobuf_pool->arenas[i]); + INIT_LIST_HEAD(&iobuf_pool->filled[i]); + INIT_LIST_HEAD(&iobuf_pool->purge[i]); + } + + iobuf_pool->default_page_size = 128 * GF_UNIT_KB; + + iobuf_pool->rdma_registration = NULL; + iobuf_pool->rdma_deregistration = NULL; + + for (i = 0; i < GF_RDMA_DEVICE_COUNT; i++) { + iobuf_pool->device[i] = NULL; + iobuf_pool->mr_list[i] = NULL; + } + + pthread_mutex_lock(&iobuf_pool->mutex); + { + for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { + page_size = gf_iobuf_init_config[i].pagesize; + num_pages = gf_iobuf_init_config[i].num_pages; + + index = gf_iobuf_get_arena_index(page_size); + if (index == -1) { + pthread_mutex_unlock(&iobuf_pool->mutex); + gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, + "page_size (%zu) of iobufs in arena being added is " + "greater than max available", + page_size); + return NULL; + } + + __iobuf_pool_add_arena(iobuf_pool, page_size, num_pages, index); + + arena_size += page_size * num_pages; + } + } + pthread_mutex_unlock(&iobuf_pool->mutex); - GF_ATOMIC_INIT(iobuf_pool->mem_pool_hit, 0); - GF_ATOMIC_INIT(iobuf_pool->mem_pool_miss, 0); - GF_ATOMIC_INIT(iobuf_pool->active_cnt, 0); + /* Need an arena to handle all the bigger iobuf requests */ + iobuf_create_stdalloc_arena(iobuf_pool); + iobuf_pool->arena_size = arena_size; out: + return iobuf_pool; } +static void +__iobuf_arena_prune(struct iobuf_pool *iobuf_pool, + struct iobuf_arena *iobuf_arena, const int index) +{ + /* code flow comes here only if the arena is in purge list and we can + * free the arena only if we have at least one arena in 'arenas' list + * (ie, at least few iobufs free in arena), that way, there won't + * be spurious mmap/unmap of buffers + */ + if (list_empty(&iobuf_pool->arenas[index])) + goto out; + + /* All cases matched, destroy */ + list_del_init(&iobuf_arena->list); + list_del_init(&iobuf_arena->all_list); + iobuf_pool->arena_cnt--; + + __iobuf_arena_destroy(iobuf_pool, iobuf_arena); + +out: + return; +} + void -iobuf_pool_destroy(struct iobuf_pool *iobuf_pool) +iobuf_pool_prune(struct iobuf_pool *iobuf_pool) { - if (!iobuf_pool) - return; + struct iobuf_arena *iobuf_arena = NULL; + struct iobuf_arena *tmp = NULL; + int i = 0; - if (GF_ATOMIC_GET(iobuf_pool->active_cnt) != 0) - gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND, - "iobuf_pool_destroy called, but there" - " are unfreed active iobufs:%" PRId64, - GF_ATOMIC_GET(iobuf_pool->active_cnt)); + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); - GF_FREE(iobuf_pool); + pthread_mutex_lock(&iobuf_pool->mutex); + { + for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) { + if (list_empty(&iobuf_pool->arenas[i])) { + continue; + } + + list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i], + list) + { + __iobuf_arena_prune(iobuf_pool, iobuf_arena, i); + } + } + } + pthread_mutex_unlock(&iobuf_pool->mutex); + +out: return; } +/* Always called under the iobuf_pool mutex lock */ +static struct iobuf_arena * +__iobuf_select_arena(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int index) +{ + struct iobuf_arena *iobuf_arena = NULL; + struct iobuf_arena *trav = NULL; + + /* look for unused iobuf from the head-most arena */ + list_for_each_entry(trav, &iobuf_pool->arenas[index], list) + { + if (trav->passive_cnt) { + iobuf_arena = trav; + break; + } + } + + if (!iobuf_arena) { + /* all arenas were full, find the right count to add */ + iobuf_arena = __iobuf_pool_add_arena( + iobuf_pool, page_size, gf_iobuf_init_config[index].num_pages, + index); + } + + return iobuf_arena; +} + +/* Always called under the iobuf_pool mutex lock */ +static struct iobuf * +__iobuf_get(struct iobuf_pool *iobuf_pool, const size_t page_size, + const int index) +{ + struct iobuf *iobuf = NULL; + struct iobuf_arena *iobuf_arena = NULL; + + /* most eligible arena for picking an iobuf */ + iobuf_arena = __iobuf_select_arena(iobuf_pool, page_size, index); + if (!iobuf_arena) + return NULL; + + list_for_each_entry(iobuf, &iobuf_arena->passive.list, list) break; + + list_del(&iobuf->list); + iobuf_arena->passive_cnt--; + + list_add(&iobuf->list, &iobuf_arena->active.list); + iobuf_arena->active_cnt++; + + /* no resetting requied for this element */ + iobuf_arena->alloc_cnt++; + + if (iobuf_arena->max_active < iobuf_arena->active_cnt) + iobuf_arena->max_active = iobuf_arena->active_cnt; + + if (iobuf_arena->passive_cnt == 0) { + list_del(&iobuf_arena->list); + list_add(&iobuf_arena->list, &iobuf_pool->filled[index]); + } + + return iobuf; +} + struct iobuf * -iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size) +iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, size_t page_size) { struct iobuf *iobuf = NULL; - gf_boolean_t hit = _gf_false; + struct iobuf_arena *iobuf_arena = NULL; + struct iobuf_arena *trav = NULL; + int ret = -1; - if (page_size == 0) { - page_size = GF_IOBUF_DEFAULT_PAGE_SIZE; + /* The first arena in the 'MAX-INDEX' will always be used for misc */ + list_for_each_entry(trav, &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX], list) + { + iobuf_arena = trav; + break; } - iobuf = mem_pool_get0(sizeof(struct iobuf), &hit); + iobuf = GF_CALLOC(1, sizeof(*iobuf), gf_common_mt_iobuf); if (!iobuf) goto out; - iobuf->free_ptr = mem_pool_get(page_size, &hit); - if (!iobuf->free_ptr) { - iobuf->free_ptr = GF_MALLOC(page_size, gf_common_mt_char); - iobuf->stdalloc = _gf_true; - } - if (!iobuf->free_ptr) { - mem_put(iobuf); - iobuf = NULL; + /* 4096 is the alignment */ + iobuf->free_ptr = GF_CALLOC(1, ((page_size + GF_IOBUF_ALIGN_SIZE) - 1), + gf_common_mt_char); + if (!iobuf->free_ptr) goto out; - } - if (hit == _gf_true) - GF_ATOMIC_INC(iobuf_pool->mem_pool_hit); - else - GF_ATOMIC_INC(iobuf_pool->mem_pool_miss); - iobuf->ptr = iobuf->free_ptr; + iobuf->ptr = GF_ALIGN_BUF(iobuf->free_ptr, GF_IOBUF_ALIGN_SIZE); + iobuf->iobuf_arena = iobuf_arena; LOCK_INIT(&iobuf->lock); - iobuf->page_size = page_size; - iobuf->iobuf_pool = iobuf_pool; - /* Hold a ref because you are allocating and using it */ - iobuf_ref(iobuf); - GF_ATOMIC_INC(iobuf_pool->active_cnt); + GF_ATOMIC_INIT(iobuf->ref, 1); + + ret = 0; out: + if (ret && iobuf) { + GF_FREE(iobuf->free_ptr); + GF_FREE(iobuf); + iobuf = NULL; + } + + return iobuf; +} + +struct iobuf * +iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size) +{ + struct iobuf *iobuf = NULL; + size_t rounded_size = 0; + int index = 0; + + if (page_size == 0) { + page_size = iobuf_pool->default_page_size; + } + + rounded_size = gf_iobuf_get_pagesize(page_size); + if (rounded_size == -1) { + /* make sure to provide the requested buffer with standard + memory allocations */ + iobuf = iobuf_get_from_stdalloc(iobuf_pool, page_size); + + gf_msg_debug("iobuf", 0, + "request for iobuf of size %zu " + "is serviced using standard calloc() (%p) as it " + "exceeds the maximum available buffer size", + page_size, iobuf); + + iobuf_pool->request_misses++; + return iobuf; + } + + index = gf_iobuf_get_arena_index(page_size); + if (index == -1) { + gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, + "page_size (%zu) of iobufs in arena being added is " + "greater than max available", + page_size); + return NULL; + } + + pthread_mutex_lock(&iobuf_pool->mutex); + { + iobuf = __iobuf_get(iobuf_pool, rounded_size, index); + if (!iobuf) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND, + "iobuf not found"); + goto unlock; + } + + iobuf_ref(iobuf); + } +unlock: + pthread_mutex_unlock(&iobuf_pool->mutex); + return iobuf; } @@ -99,13 +607,23 @@ iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size, req_size = page_size; if (req_size == 0) { - req_size = GF_IOBUF_DEFAULT_PAGE_SIZE; + req_size = iobuf_pool->default_page_size; } iobuf = iobuf_get2(iobuf_pool, req_size + align_size); if (!iobuf) return NULL; - + /* If std allocation was used, then free_ptr will be non-NULL. In this + * case, we do not want to modify the original free_ptr. + * On the other hand, if the buf was gotten through the available + * arenas, then we use iobuf->free_ptr to store the original + * pointer to the offset into the mmap'd block of memory and in turn + * reuse iobuf->ptr to hold the page-aligned address. And finally, in + * iobuf_put(), we copy iobuf->free_ptr into iobuf->ptr - back to where + * it was originally when __iobuf_get() returned this iobuf. + */ + if (!iobuf->free_ptr) + iobuf->free_ptr = iobuf->ptr; iobuf->ptr = GF_ALIGN_BUF(iobuf->ptr, align_size); return iobuf; @@ -114,22 +632,118 @@ iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size, struct iobuf * iobuf_get(struct iobuf_pool *iobuf_pool) { - return iobuf_get2(iobuf_pool, GF_IOBUF_DEFAULT_PAGE_SIZE); + struct iobuf *iobuf = NULL; + int index = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); + + index = gf_iobuf_get_arena_index(iobuf_pool->default_page_size); + if (index == -1) { + gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED, + "page_size (%zu) of iobufs in arena being added is " + "greater than max available", + iobuf_pool->default_page_size); + return NULL; + } + + pthread_mutex_lock(&iobuf_pool->mutex); + { + iobuf = __iobuf_get(iobuf_pool, iobuf_pool->default_page_size, index); + if (!iobuf) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND, + "iobuf not found"); + goto unlock; + } + + iobuf_ref(iobuf); + } +unlock: + pthread_mutex_unlock(&iobuf_pool->mutex); + +out: + return iobuf; +} + +void +__iobuf_put(struct iobuf *iobuf, struct iobuf_arena *iobuf_arena) +{ + struct iobuf_pool *iobuf_pool = NULL; + int index = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); + GF_VALIDATE_OR_GOTO("iobuf", iobuf, out); + + iobuf_pool = iobuf_arena->iobuf_pool; + + index = gf_iobuf_get_arena_index(iobuf_arena->page_size); + if (index == -1) { + gf_msg_debug("iobuf", 0, + "freeing the iobuf (%p) " + "allocated with standard calloc()", + iobuf); + + /* free up properly without bothering about lists and all */ + LOCK_DESTROY(&iobuf->lock); + GF_FREE(iobuf->free_ptr); + GF_FREE(iobuf); + return; + } + + if (iobuf_arena->passive_cnt == 0) { + list_del(&iobuf_arena->list); + list_add_tail(&iobuf_arena->list, &iobuf_pool->arenas[index]); + } + + list_del_init(&iobuf->list); + iobuf_arena->active_cnt--; + + if (iobuf->free_ptr) { + iobuf->ptr = iobuf->free_ptr; + iobuf->free_ptr = NULL; + } + + list_add(&iobuf->list, &iobuf_arena->passive.list); + iobuf_arena->passive_cnt++; + + if (iobuf_arena->active_cnt == 0) { + list_del(&iobuf_arena->list); + list_add_tail(&iobuf_arena->list, &iobuf_pool->purge[index]); + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); + __iobuf_arena_prune(iobuf_pool, iobuf_arena, index); + } +out: + return; } void iobuf_put(struct iobuf *iobuf) { - LOCK_DESTROY(&iobuf->lock); + struct iobuf_arena *iobuf_arena = NULL; + struct iobuf_pool *iobuf_pool = NULL; - if (iobuf->stdalloc) - GF_FREE(iobuf->free_ptr); - else - mem_put(iobuf->free_ptr); + GF_VALIDATE_OR_GOTO("iobuf", iobuf, out); + + iobuf_arena = iobuf->iobuf_arena; + if (!iobuf_arena) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, + "arena not found"); + return; + } + + iobuf_pool = iobuf_arena->iobuf_pool; + if (!iobuf_pool) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, + "iobuf pool not found"); + return; + } - GF_ATOMIC_DEC(iobuf->iobuf_pool->active_cnt); - mem_put(iobuf); + pthread_mutex_lock(&iobuf_pool->mutex); + { + __iobuf_put(iobuf, iobuf_arena); + } + pthread_mutex_unlock(&iobuf_pool->mutex); +out: return; } @@ -353,10 +967,25 @@ out: size_t iobuf_size(struct iobuf *iobuf) { - if (!iobuf) - return 0; + size_t size = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf, out); - return iobuf->page_size; + if (!iobuf->iobuf_arena) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, + "arena not found"); + goto out; + } + + if (!iobuf->iobuf_arena->iobuf_pool) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, + "pool not found"); + goto out; + } + + size = iobuf->iobuf_arena->page_size; +out: + return size; } size_t @@ -380,21 +1009,114 @@ out: return size; } +void +iobuf_info_dump(struct iobuf *iobuf, const char *key_prefix) +{ + char key[GF_DUMP_MAX_BUF_LEN]; + struct iobuf my_iobuf; + int ret = 0; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf, out); + + ret = TRY_LOCK(&iobuf->lock); + if (ret) { + return; + } + memcpy(&my_iobuf, iobuf, sizeof(my_iobuf)); + UNLOCK(&iobuf->lock); + + gf_proc_dump_build_key(key, key_prefix, "ref"); + gf_proc_dump_write(key, "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(my_iobuf.ref)); + gf_proc_dump_build_key(key, key_prefix, "ptr"); + gf_proc_dump_write(key, "%p", my_iobuf.ptr); + +out: + return; +} + +void +iobuf_arena_info_dump(struct iobuf_arena *iobuf_arena, const char *key_prefix) +{ + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 1; + struct iobuf *trav; + + GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out); + + gf_proc_dump_build_key(key, key_prefix, "mem_base"); + gf_proc_dump_write(key, "%p", iobuf_arena->mem_base); + gf_proc_dump_build_key(key, key_prefix, "active_cnt"); + gf_proc_dump_write(key, "%d", iobuf_arena->active_cnt); + gf_proc_dump_build_key(key, key_prefix, "passive_cnt"); + gf_proc_dump_write(key, "%d", iobuf_arena->passive_cnt); + gf_proc_dump_build_key(key, key_prefix, "alloc_cnt"); + gf_proc_dump_write(key, "%" PRIu64, iobuf_arena->alloc_cnt); + gf_proc_dump_build_key(key, key_prefix, "max_active"); + gf_proc_dump_write(key, "%d", iobuf_arena->max_active); + gf_proc_dump_build_key(key, key_prefix, "page_size"); + gf_proc_dump_write(key, "%" GF_PRI_SIZET, iobuf_arena->page_size); + list_for_each_entry(trav, &iobuf_arena->active.list, list) + { + gf_proc_dump_build_key(key, key_prefix, "active_iobuf.%d", i++); + gf_proc_dump_add_section("%s", key); + iobuf_info_dump(trav, key); + } + +out: + return; +} + void iobuf_stats_dump(struct iobuf_pool *iobuf_pool) { + char msg[1024]; + struct iobuf_arena *trav = NULL; + int i = 1; + int j = 0; + int ret = -1; + GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out); + ret = pthread_mutex_trylock(&iobuf_pool->mutex); + + if (ret) { + return; + } gf_proc_dump_add_section("iobuf.global"); gf_proc_dump_write("iobuf_pool", "%p", iobuf_pool); - gf_proc_dump_write("iobuf_pool.default_page_size", "%llu", - GF_IOBUF_DEFAULT_PAGE_SIZE); - gf_proc_dump_write("iobuf_pool.request_hits", "%" PRId64, - GF_ATOMIC_GET(iobuf_pool->mem_pool_hit)); + gf_proc_dump_write("iobuf_pool.default_page_size", "%" GF_PRI_SIZET, + iobuf_pool->default_page_size); + gf_proc_dump_write("iobuf_pool.arena_size", "%" GF_PRI_SIZET, + iobuf_pool->arena_size); + gf_proc_dump_write("iobuf_pool.arena_cnt", "%d", iobuf_pool->arena_cnt); gf_proc_dump_write("iobuf_pool.request_misses", "%" PRId64, - GF_ATOMIC_GET(iobuf_pool->mem_pool_miss)); - gf_proc_dump_write("iobuf_pool.active_cnt", "%" PRId64, - GF_ATOMIC_GET(iobuf_pool->active_cnt)); + iobuf_pool->request_misses); + + for (j = 0; j < IOBUF_ARENA_MAX_INDEX; j++) { + list_for_each_entry(trav, &iobuf_pool->arenas[j], list) + { + snprintf(msg, sizeof(msg), "arena.%d", i); + gf_proc_dump_add_section("%s", msg); + iobuf_arena_info_dump(trav, msg); + i++; + } + list_for_each_entry(trav, &iobuf_pool->purge[j], list) + { + snprintf(msg, sizeof(msg), "purge.%d", i); + gf_proc_dump_add_section("%s", msg); + iobuf_arena_info_dump(trav, msg); + i++; + } + list_for_each_entry(trav, &iobuf_pool->filled[j], list) + { + snprintf(msg, sizeof(msg), "filled.%d", i); + gf_proc_dump_add_section("%s", msg); + iobuf_arena_info_dump(trav, msg); + i++; + } + } + + pthread_mutex_unlock(&iobuf_pool->mutex); out: return; diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c index 4f46b1e3ca0..81badc0ba0b 100644 --- a/libglusterfs/src/mem-pool.c +++ b/libglusterfs/src/mem-pool.c @@ -920,7 +920,7 @@ mem_pool_get(unsigned long sizeof_type, gf_boolean_t *hit) sizeof_type |= (1 << POOL_SMALLEST) - 1; power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1; if (power > POOL_LARGEST) { - gf_msg_callingfn("mem-pool", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, + gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "invalid argument"); return NULL; } -- cgit