summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src/mem-pool.c
diff options
context:
space:
mode:
Diffstat (limited to 'libglusterfs/src/mem-pool.c')
-rw-r--r--libglusterfs/src/mem-pool.c687
1 files changed, 338 insertions, 349 deletions
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 4ef62b8da48..2d5a12b0a00 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -8,14 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "mem-pool.h"
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h" // for GF_ASSERT, gf_thread_cr...
+#include "glusterfs/globals.h" // for xlator_t, THIS
#include <stdlib.h>
#include <stdarg.h>
#include "unittest/unittest.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/libglusterfs-messages.h"
void
gf_mem_acct_enable_set(void *data)
@@ -35,61 +35,101 @@ gf_mem_acct_enable_set(void *data)
return;
}
-int
-gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type,
- const char *typestr)
+static void *
+gf_mem_header_prepare(struct mem_header *header, size_t size)
{
- void *ptr = NULL;
- struct mem_header *header = NULL;
+ void *ptr;
- if (!alloc_ptr)
- return -1;
+ header->size = size;
- ptr = *alloc_ptr;
+ ptr = header + 1;
- GF_ASSERT(xl != NULL);
+ /* data follows in this gap of 'size' bytes */
+ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
- GF_ASSERT(xl->mem_acct != NULL);
+ return ptr;
+}
- GF_ASSERT(type <= xl->mem_acct->num_types);
+static void *
+gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size, uint32_t type, const char *typestr)
+{
+ struct mem_acct_rec *rec = NULL;
+ bool new_ref = false;
- LOCK(&xl->mem_acct->rec[type].lock);
- {
- if (!xl->mem_acct->rec[type].typestr)
- xl->mem_acct->rec[type].typestr = typestr;
- xl->mem_acct->rec[type].size += size;
- xl->mem_acct->rec[type].num_allocs++;
- xl->mem_acct->rec[type].total_allocs++;
- xl->mem_acct->rec[type].max_size = max(xl->mem_acct->rec[type].max_size,
- xl->mem_acct->rec[type].size);
- xl->mem_acct->rec[type].max_num_allocs = max(
- xl->mem_acct->rec[type].max_num_allocs,
- xl->mem_acct->rec[type].num_allocs);
- }
- UNLOCK(&xl->mem_acct->rec[type].lock);
+ if (mem_acct != NULL) {
+ GF_ASSERT(type <= mem_acct->num_types);
- GF_ATOMIC_INC(xl->mem_acct->refcnt);
+ rec = &mem_acct->rec[type];
+ LOCK(&rec->lock);
+ {
+ if (!rec->typestr) {
+ rec->typestr = typestr;
+ }
+ rec->size += size;
+ new_ref = (rec->num_allocs == 0);
+ rec->num_allocs++;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs);
+
+#ifdef DEBUG
+ list_add(&header->acct_list, &rec->obj_list);
+#endif
+ }
+ UNLOCK(&rec->lock);
+
+ /* We only take a reference for each memory type used, not for each
+ * allocation. This minimizes the use of atomic operations. */
+ if (new_ref) {
+ GF_ATOMIC_INC(mem_acct->refcnt);
+ }
+ }
- header = (struct mem_header *)ptr;
header->type = type;
- header->size = size;
- header->mem_acct = xl->mem_acct;
+ header->mem_acct = mem_acct;
header->magic = GF_MEM_HEADER_MAGIC;
+ return gf_mem_header_prepare(header, size);
+}
+
+static void *
+gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size)
+{
+ struct mem_acct_rec *rec = NULL;
+
+ if (mem_acct != NULL) {
+ rec = &mem_acct->rec[header->type];
+ LOCK(&rec->lock);
+ {
+ rec->size += size - header->size;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+
#ifdef DEBUG
- INIT_LIST_HEAD(&header->acct_list);
- LOCK(&xl->mem_acct->rec[type].lock);
- {
- list_add(&header->acct_list, &(xl->mem_acct->rec[type].obj_list));
- }
- UNLOCK(&xl->mem_acct->rec[type].lock);
+ /* The old 'header' already was present in 'obj_list', but
+ * realloc() could have changed its address. We need to remove
+ * the old item from the list and add the new one. This can be
+ * done this way because list_move() doesn't use the pointers
+ * to the old location (which are not valid anymore) already
+ * present in the list, it simply overwrites them. */
+ list_move(&header->acct_list, &rec->obj_list);
#endif
- ptr += sizeof(struct mem_header);
- /* data follows in this gap of 'size' bytes */
- *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
+ }
+ UNLOCK(&rec->lock);
+ }
+
+ return gf_mem_header_prepare(header, size);
+}
- *alloc_ptr = ptr;
- return 0;
+static bool
+gf_mem_acct_enabled(void)
+{
+ xlator_t *x = THIS;
+ /* Low-level __gf_xxx() may be called
+ before ctx is initialized. */
+ return x->ctx && x->ctx->mem_acct_enable;
}
void *
@@ -97,10 +137,10 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
{
size_t tot_size = 0;
size_t req_size = 0;
- char *ptr = NULL;
+ void *ptr = NULL;
xlator_t *xl = NULL;
- if (!THIS->ctx->mem_acct_enable)
+ if (!gf_mem_acct_enabled())
return CALLOC(nmemb, size);
xl = THIS;
@@ -114,19 +154,18 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
gf_msg_nomem("", GF_LOG_ALERT, tot_size);
return NULL;
}
- gf_mem_set_acct_info(xl, &ptr, req_size, type, typestr);
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr);
}
void *
__gf_malloc(size_t size, uint32_t type, const char *typestr)
{
size_t tot_size = 0;
- char *ptr = NULL;
+ void *ptr = NULL;
xlator_t *xl = NULL;
- if (!THIS->ctx->mem_acct_enable)
+ if (!gf_mem_acct_enabled())
return MALLOC(size);
xl = THIS;
@@ -138,84 +177,32 @@ __gf_malloc(size_t size, uint32_t type, const char *typestr)
gf_msg_nomem("", GF_LOG_ALERT, tot_size);
return NULL;
}
- gf_mem_set_acct_info(xl, &ptr, size, type, typestr);
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr);
}
void *
__gf_realloc(void *ptr, size_t size)
{
size_t tot_size = 0;
- char *new_ptr;
- struct mem_header *old_header = NULL;
- struct mem_header *new_header = NULL;
- struct mem_header tmp_header;
+ struct mem_header *header = NULL;
- if (!THIS->ctx->mem_acct_enable)
+ if (!gf_mem_acct_enabled())
return REALLOC(ptr, size);
REQUIRE(NULL != ptr);
- old_header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
- GF_ASSERT(old_header->magic == GF_MEM_HEADER_MAGIC);
- tmp_header = *old_header;
-
-#ifdef DEBUG
- int type = 0;
- size_t copy_size = 0;
-
- /* Making these changes for realloc is not straightforward. So
- * I am simulating realloc using calloc and free
- */
-
- type = tmp_header.type;
- new_ptr = __gf_calloc(1, size, type,
- tmp_header.mem_acct->rec[type].typestr);
- if (new_ptr) {
- copy_size = (size > tmp_header.size) ? tmp_header.size : size;
- memcpy(new_ptr, ptr, copy_size);
- __gf_free(ptr);
- }
-
- /* This is not quite what the man page says should happen */
- return new_ptr;
-#endif
+ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
+ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC);
tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- new_ptr = realloc(old_header, tot_size);
- if (!new_ptr) {
+ header = realloc(header, tot_size);
+ if (!header) {
gf_msg_nomem("", GF_LOG_ALERT, tot_size);
return NULL;
}
- /*
- * We used to pass (char **)&ptr as the second
- * argument after the value of realloc was saved
- * in ptr, but the compiler warnings complained
- * about the casting to and forth from void ** to
- * char **.
- * TBD: it would be nice to adjust the memory accounting info here,
- * but calling gf_mem_set_acct_info here is wrong because it bumps
- * up counts as though this is a new allocation - which it's not.
- * The consequence of doing nothing here is only that the sizes will be
- * wrong, but at least the counts won't be.
- uint32_t type = 0;
- xlator_t *xl = NULL;
- type = header->type;
- xl = (xlator_t *) header->xlator;
- gf_mem_set_acct_info (xl, &new_ptr, size, type, NULL);
- */
-
- new_header = (struct mem_header *)new_ptr;
- *new_header = tmp_header;
- new_header->size = size;
-
- new_ptr += sizeof(struct mem_header);
- /* data follows in this gap of 'size' bytes */
- *(uint32_t *)(new_ptr + size) = GF_MEM_TRAILER_MAGIC;
-
- return (void *)new_ptr;
+ return gf_mem_update_acct_info(header->mem_acct, header, size);
}
int
@@ -321,8 +308,9 @@ __gf_free(void *free_ptr)
void *ptr = NULL;
struct mem_acct *mem_acct;
struct mem_header *header = NULL;
+ bool last_ref = false;
- if (!THIS->ctx->mem_acct_enable) {
+ if (!gf_mem_acct_enabled()) {
FREE(free_ptr);
return;
}
@@ -348,21 +336,22 @@ __gf_free(void *free_ptr)
LOCK(&mem_acct->rec[header->type].lock);
{
- GF_ASSERT(mem_acct->rec[header->type].size >= header->size);
mem_acct->rec[header->type].size -= header->size;
mem_acct->rec[header->type].num_allocs--;
/* If all the instances are freed up then ensure typestr is set
* to NULL */
- if (!mem_acct->rec[header->type].num_allocs)
+ if (!mem_acct->rec[header->type].num_allocs) {
+ last_ref = true;
mem_acct->rec[header->type].typestr = NULL;
+ }
#ifdef DEBUG
list_del(&header->acct_list);
#endif
}
UNLOCK(&mem_acct->rec[header->type].lock);
- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
- FREE(mem_acct);
+ if (last_ref) {
+ xlator_mem_acct_unref(mem_acct);
}
free:
@@ -373,11 +362,30 @@ free:
FREE(ptr);
}
-#define POOL_SMALLEST 7 /* i.e. 128 */
-#define POOL_LARGEST 20 /* i.e. 1048576 */
-#define NPOOLS (POOL_LARGEST - POOL_SMALLEST + 1)
+#if defined(GF_DISABLE_MEMPOOL)
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
+{
+ struct mem_pool *new;
+
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
+
+ new->sizeof_type = sizeof_type;
+ return new;
+}
+
+void
+mem_pool_destroy(struct mem_pool *pool)
+{
+ GF_FREE(pool);
+}
+
+#else /* !GF_DISABLE_MEMPOOL */
-static pthread_key_t pool_key;
static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
static struct list_head pool_threads;
static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -385,23 +393,18 @@ static struct list_head pool_free_threads;
static struct mem_pool_shared pools[NPOOLS];
static size_t pool_list_size;
-#if !defined(GF_DISABLE_MEMPOOL)
+static __thread per_thread_pool_list_t *thread_pool_list = NULL;
+
#define N_COLD_LISTS 1024
#define POOL_SWEEP_SECS 30
-static unsigned long sweep_times;
-static unsigned long sweep_usecs;
-static unsigned long frees_to_system;
-
typedef struct {
- struct list_head death_row;
pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
unsigned int n_cold_lists;
} sweep_state_t;
enum init_state {
GF_MEMPOOL_INIT_NONE = 0,
- GF_MEMPOOL_INIT_PREINIT,
GF_MEMPOOL_INIT_EARLY,
GF_MEMPOOL_INIT_LATE,
GF_MEMPOOL_INIT_DESTROY
@@ -412,36 +415,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
static unsigned int init_count = 0;
static pthread_t sweeper_tid;
-gf_boolean_t
+static bool
collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
{
unsigned int i;
per_thread_pool_t *pt_pool;
- gf_boolean_t poisoned;
(void)pthread_spin_lock(&pool_list->lock);
- poisoned = pool_list->poison != 0;
- if (!poisoned) {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- if (pt_pool->cold_list) {
- if (state->n_cold_lists >= N_COLD_LISTS) {
- break;
- }
- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
+ for (i = 0; i < NPOOLS; ++i) {
+ pt_pool = &pool_list->pools[i];
+ if (pt_pool->cold_list) {
+ if (state->n_cold_lists >= N_COLD_LISTS) {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ return true;
}
- pt_pool->cold_list = pt_pool->hot_list;
- pt_pool->hot_list = NULL;
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
}
+ pt_pool->cold_list = pt_pool->hot_list;
+ pt_pool->hot_list = NULL;
}
(void)pthread_spin_unlock(&pool_list->lock);
- return poisoned;
+ return false;
}
-void
+static void
free_obj_list(pooled_obj_hdr_t *victim)
{
pooled_obj_hdr_t *next;
@@ -450,93 +450,101 @@ free_obj_list(pooled_obj_hdr_t *victim)
next = victim->next;
free(victim);
victim = next;
- ++frees_to_system;
}
}
-void *
+static void *
pool_sweeper(void *arg)
{
sweep_state_t state;
per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- per_thread_pool_t *pt_pool;
- unsigned int i;
- struct timeval begin_time;
- struct timeval end_time;
- struct timeval elapsed;
- gf_boolean_t poisoned;
+ uint32_t i;
+ bool pending;
/*
* This is all a bit inelegant, but the point is to avoid doing
* expensive things (like freeing thousands of objects) while holding a
- * global lock. Thus, we split each iteration into three passes, with
+ * global lock. Thus, we split each iteration into two passes, with
* only the first and fastest holding the lock.
*/
+ pending = true;
+
for (;;) {
- sleep(POOL_SWEEP_SECS);
+ /* If we know there's pending work to do (or it's the first run), we
+ * do collect garbage more often. */
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
+
(void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
- INIT_LIST_HEAD(&state.death_row);
state.n_cold_lists = 0;
+ pending = false;
/* First pass: collect stuff that needs our attention. */
- (void)gettimeofday(&begin_time, NULL);
(void)pthread_mutex_lock(&pool_lock);
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
{
- (void)pthread_mutex_unlock(&pool_lock);
- poisoned = collect_garbage(&state, pool_list);
- (void)pthread_mutex_lock(&pool_lock);
-
- if (poisoned) {
- list_move(&pool_list->thr_list, &state.death_row);
+ if (collect_garbage(&state, pool_list)) {
+ pending = true;
}
}
(void)pthread_mutex_unlock(&pool_lock);
- (void)gettimeofday(&end_time, NULL);
- timersub(&end_time, &begin_time, &elapsed);
- sweep_usecs += elapsed.tv_sec * 1000000 + elapsed.tv_usec;
- sweep_times += 1;
-
- /* Second pass: free dead pools. */
- (void)pthread_mutex_lock(&pool_free_lock);
- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- free_obj_list(pt_pool->cold_list);
- free_obj_list(pt_pool->hot_list);
- pt_pool->hot_list = pt_pool->cold_list = NULL;
- }
- list_del(&pool_list->thr_list);
- list_add(&pool_list->thr_list, &pool_free_threads);
- }
- (void)pthread_mutex_unlock(&pool_free_lock);
- /* Third pass: free cold objects from live pools. */
+ /* Second pass: free cold objects from live pools. */
for (i = 0; i < state.n_cold_lists; ++i) {
free_obj_list(state.cold_lists[i]);
}
(void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
}
+
+ return NULL;
}
void
-pool_destructor(void *arg)
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
- per_thread_pool_list_t *pool_list = arg;
+ per_thread_pool_t *pt_pool;
+ uint32_t i;
- /* The pool-sweeper thread will take it from here.
- *
- * We can change 'poison' here without taking locks because the change
- * itself doesn't interact with other parts of the code and a simple write
- * is already atomic from the point of view of the processor.
- *
- * This change can modify what mem_put() does, but both possibilities are
- * fine until the sweeper thread kicks in. The real synchronization must be
- * between mem_put() and the sweeper thread. */
- pool_list->poison = 1;
+ if (pool_list == NULL) {
+ pool_list = thread_pool_list;
+ }
+
+ /* The current thread is terminating. None of the allocated objects will
+ * be used again. We can directly destroy them here instead of delaying
+ * it until the next sweeper loop. */
+ if (pool_list != NULL) {
+ /* Remove pool_list from the global list to avoid that sweeper
+ * could touch it. */
+ pthread_mutex_lock(&pool_lock);
+ list_del(&pool_list->thr_list);
+ pthread_mutex_unlock(&pool_lock);
+
+ /* We need to protect hot/cold changes from potential mem_put() calls
+ * that reference this pool_list. Once poison is set to true, we are
+ * sure that no one else will touch hot/cold lists. The only possible
+ * race is when at the same moment a mem_put() is adding a new item
+ * to the hot list. We protect from that by taking pool_list->lock.
+ * After that we don't need the lock to destroy the hot/cold lists. */
+ pthread_spin_lock(&pool_list->lock);
+ pool_list->poison = true;
+ pthread_spin_unlock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; i++) {
+ pt_pool = &pool_list->pools[i];
+
+ free_obj_list(pt_pool->hot_list);
+ pt_pool->hot_list = NULL;
+
+ free_obj_list(pt_pool->cold_list);
+ pt_pool->cold_list = NULL;
+ }
+
+ pthread_mutex_lock(&pool_free_lock);
+ list_add(&pool_list->thr_list, &pool_free_threads);
+ pthread_mutex_unlock(&pool_free_lock);
+
+ thread_pool_list = NULL;
+ }
}
static __attribute__((constructor)) void
@@ -559,46 +567,30 @@ mem_pools_preinit(void)
pool_list_size = sizeof(per_thread_pool_list_t) +
sizeof(per_thread_pool_t) * (NPOOLS - 1);
- init_done = GF_MEMPOOL_INIT_PREINIT;
+ init_done = GF_MEMPOOL_INIT_EARLY;
}
-/* Use mem_pools_init_early() function for basic initialization. There will be
- * no cleanup done by the pool_sweeper thread until mem_pools_init_late() has
- * been called. Calling mem_get() will be possible after this function has
- * setup the basic structures. */
-void
-mem_pools_init_early(void)
+static __attribute__((destructor)) void
+mem_pools_postfini(void)
{
- pthread_mutex_lock(&init_mutex);
- /* Use a pthread_key destructor to clean up when a thread exits.
+ /* TODO: This function should destroy all per thread memory pools that
+ * are still alive, but this is not possible right now because glibc
+ * starts calling destructors as soon as exit() is called, and
+ * gluster doesn't ensure that all threads have been stopped before
+ * calling exit(). Existing threads would crash when they try to use
+ * memory or they terminate if we destroy things here.
*
- * We won't increase init_count here, that is only done when the
- * pool_sweeper thread is started too.
- */
- if (init_done == GF_MEMPOOL_INIT_PREINIT ||
- init_done == GF_MEMPOOL_INIT_DESTROY) {
- /* key has not been created yet */
- if (pthread_key_create(&pool_key, pool_destructor) != 0) {
- gf_log("mem-pool", GF_LOG_CRITICAL,
- "failed to initialize mem-pool key");
- }
-
- init_done = GF_MEMPOOL_INIT_EARLY;
- } else {
- gf_log("mem-pool", GF_LOG_CRITICAL,
- "incorrect order of mem-pool initialization "
- "(init_done=%d)",
- init_done);
- }
-
- pthread_mutex_unlock(&init_mutex);
+ * When we propertly terminate all threads, we can add the needed
+ * code here. Till then we need to leave the memory allocated. Most
+ * probably this function will be executed on process termination,
+ * so the memory will be released anyway by the system. */
}
-/* Call mem_pools_init_late() once threading has been configured completely.
- * This prevent the pool_sweeper thread from getting killed once the main()
- * thread exits during deamonizing. */
+/* Call mem_pools_init() once threading has been configured completely. This
+ * prevent the pool_sweeper thread from getting killed once the main() thread
+ * exits during deamonizing. */
void
-mem_pools_init_late(void)
+mem_pools_init(void)
{
pthread_mutex_lock(&init_mutex);
if ((init_count++) == 0) {
@@ -617,56 +609,27 @@ mem_pools_fini(void)
switch (init_count) {
case 0:
/*
- * If init_count is already zero (as e.g. if somebody called
- * this before mem_pools_init_late) then the sweeper was
- * probably never even started so we don't need to stop it.
- * Even if there's some crazy circumstance where there is a
- * sweeper but init_count is still zero, that just means we'll
- * leave it running. Not perfect, but far better than any
- * known alternative.
+ * If init_count is already zero (as e.g. if somebody called this
+ * before mem_pools_init) then the sweeper was probably never even
+ * started so we don't need to stop it. Even if there's some crazy
+ * circumstance where there is a sweeper but init_count is still
+ * zero, that just means we'll leave it running. Not perfect, but
+ * far better than any known alternative.
*/
break;
case 1: {
- per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- unsigned int i;
-
- /* if only mem_pools_init_early() was called, sweeper_tid will
- * be invalid and the functions will error out. That is not
- * critical. In all other cases, the sweeper_tid will be valid
- * and the thread gets stopped. */
+ /* if mem_pools_init() was not called, sweeper_tid will be invalid
+ * and the functions will error out. That is not critical. In all
+ * other cases, the sweeper_tid will be valid and the thread gets
+ * stopped. */
(void)pthread_cancel(sweeper_tid);
(void)pthread_join(sweeper_tid, NULL);
- /* Need to clean the pool_key to prevent further usage of the
- * per_thread_pool_list_t structure that is stored for each
- * thread.
- * This also prevents calling pool_destructor() when a thread
- * exits, so there is no chance on a use-after-free of the
- * per_thread_pool_list_t structure. */
- (void)pthread_key_delete(pool_key);
-
- /* free all objects from all pools */
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
- thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- free_obj_list(pool_list->pools[i].hot_list);
- free_obj_list(pool_list->pools[i].cold_list);
- pool_list->pools[i].hot_list = NULL;
- pool_list->pools[i].cold_list = NULL;
- }
-
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
-
- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads,
- thr_list)
- {
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
+ /* There could be threads still running in some cases, so we can't
+ * destroy pool_lists in use. We can also not destroy unused
+ * pool_lists because some allocated objects may still be pointing
+ * to them. */
+ mem_pool_thread_destructor(NULL);
init_done = GF_MEMPOOL_INIT_DESTROY;
/* Fall through. */
@@ -677,26 +640,36 @@ mem_pools_fini(void)
pthread_mutex_unlock(&init_mutex);
}
-#else
-void
-mem_pools_init_early(void)
-{
-}
void
-mem_pools_init_late(void)
-{
-}
-void
-mem_pools_fini(void)
+mem_pool_destroy(struct mem_pool *pool)
{
+ if (!pool)
+ return;
+
+ /* remove this pool from the owner (glusterfs_ctx_t) */
+ LOCK(&pool->ctx->lock);
+ {
+ list_del(&pool->owner);
+ }
+ UNLOCK(&pool->ctx->lock);
+
+ /* free this pool, but keep the mem_pool_shared */
+ GF_FREE(pool);
+
+ /*
+ * Pools are now permanent, so the mem_pool->pool is kept around. All
+ * of the objects *in* the pool will eventually be freed via the
+ * pool-sweeper thread, and this way we don't have to add a lot of
+ * reference-counting complexity.
+ */
}
-#endif
struct mem_pool *
mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
unsigned long count, char *name)
{
- unsigned int i;
+ unsigned long extra_size, size;
+ unsigned int power;
struct mem_pool *new = NULL;
struct mem_pool_shared *pool = NULL;
@@ -706,20 +679,33 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
return NULL;
}
- for (i = 0; i < NPOOLS; ++i) {
- if (sizeof_type <= AVAILABLE_SIZE(pools[i].power_of_two)) {
- pool = &pools[i];
- break;
- }
- }
-
- if (!pool) {
+ /* This is the overhead we'll have because of memory accounting for each
+ * memory block. */
+ extra_size = sizeof(pooled_obj_hdr_t);
+
+ /* We need to compute the total space needed to hold the data type and
+ * the header. Given that the smallest block size we have in the pools
+ * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST).
+ * However, since this value is only needed to compute its rounded
+ * logarithm in base 2, and this only depends on the highest bit set,
+ * we can simply do a bitwise or with the minimum size. We need to
+ * subtract 1 for correct handling of sizes that are exactly a power
+ * of 2. */
+ size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL);
+
+ /* We compute the logarithm in base 2 rounded up of the resulting size.
+ * This value will identify which pool we need to use from the pools of
+ * powers of 2. This is equivalent to finding the position of the highest
+ * bit set. */
+ power = sizeof(size) * 8 - __builtin_clzl(size);
+ if (power > POOL_LARGEST) {
gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
"invalid argument");
return NULL;
}
+ pool = &pools[power - POOL_SMALLEST];
- new = GF_CALLOC(sizeof(struct mem_pool), 1, gf_common_mt_mem_pool);
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
if (!new)
return NULL;
@@ -727,8 +713,13 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
new->sizeof_type = sizeof_type;
new->count = count;
new->name = name;
+ new->xl_name = THIS->name;
new->pool = pool;
GF_ATOMIC_INIT(new->active, 0);
+#ifdef DEBUG
+ GF_ATOMIC_INIT(new->hit, 0);
+ GF_ATOMIC_INIT(new->miss, 0);
+#endif
INIT_LIST_HEAD(&new->owner);
LOCK(&ctx->lock);
@@ -740,36 +731,13 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
return new;
}
-void *
-mem_get0(struct mem_pool *mem_pool)
-{
- void *ptr = NULL;
-
- if (!mem_pool) {
- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
- "invalid argument");
- return NULL;
- }
-
- ptr = mem_get(mem_pool);
- if (ptr) {
-#if defined(GF_DISABLE_MEMPOOL)
- memset(ptr, 0, mem_pool->sizeof_type);
-#else
- memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
-#endif
- }
-
- return ptr;
-}
-
per_thread_pool_list_t *
mem_get_pool_list(void)
{
per_thread_pool_list_t *pool_list;
unsigned int i;
- pool_list = pthread_getspecific(pool_key);
+ pool_list = thread_pool_list;
if (pool_list) {
return pool_list;
}
@@ -797,21 +765,33 @@ mem_get_pool_list(void)
}
}
+ /* There's no need to take pool_list->lock, because this is already an
+ * atomic operation and we don't need to synchronize it with any change
+ * in hot/cold lists. */
+ pool_list->poison = false;
+
(void)pthread_mutex_lock(&pool_lock);
- pool_list->poison = 0;
list_add(&pool_list->thr_list, &pool_threads);
(void)pthread_mutex_unlock(&pool_lock);
- (void)pthread_setspecific(pool_key, pool_list);
+ thread_pool_list = pool_list;
+
+ /* Ensure that all memory objects associated to the new pool_list are
+ * destroyed when the thread terminates. */
+ gf_thread_needs_cleanup();
+
return pool_list;
}
-pooled_obj_hdr_t *
+static pooled_obj_hdr_t *
mem_get_from_pool(struct mem_pool *mem_pool)
{
per_thread_pool_list_t *pool_list;
per_thread_pool_t *pt_pool;
pooled_obj_hdr_t *retval;
+#ifdef DEBUG
+ gf_boolean_t hit = _gf_true;
+#endif
pool_list = mem_get_pool_list();
if (!pool_list || pool_list->poison) {
@@ -837,34 +817,58 @@ mem_get_from_pool(struct mem_pool *mem_pool)
(void)pthread_spin_unlock(&pool_list->lock);
GF_ATOMIC_INC(pt_pool->parent->allocs_stdc);
retval = malloc(1 << pt_pool->parent->power_of_two);
+#ifdef DEBUG
+ hit = _gf_false;
+#endif
}
}
if (retval != NULL) {
- retval->magic = GF_MEM_HEADER_MAGIC;
retval->pool = mem_pool;
- retval->pool_list = pool_list;
retval->power_of_two = mem_pool->pool->power_of_two;
+#ifdef DEBUG
+ if (hit == _gf_true)
+ GF_ATOMIC_INC(mem_pool->hit);
+ else
+ GF_ATOMIC_INC(mem_pool->miss);
+#endif
+ retval->magic = GF_MEM_HEADER_MAGIC;
+ retval->pool_list = pool_list;
}
return retval;
}
+#endif /* GF_DISABLE_MEMPOOL */
+
void *
-mem_get(struct mem_pool *mem_pool)
+mem_get0(struct mem_pool *mem_pool)
{
+ void *ptr = mem_get(mem_pool);
+ if (ptr) {
#if defined(GF_DISABLE_MEMPOOL)
- return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+ memset(ptr, 0, mem_pool->sizeof_type);
#else
- pooled_obj_hdr_t *retval;
+ memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
+#endif
+ }
+ return ptr;
+}
+
+void *
+mem_get(struct mem_pool *mem_pool)
+{
if (!mem_pool) {
gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
"invalid argument");
return NULL;
}
- retval = mem_get_from_pool(mem_pool);
+#if defined(GF_DISABLE_MEMPOOL)
+ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+#else
+ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool);
if (!retval) {
return NULL;
}
@@ -896,10 +900,19 @@ mem_put(void *ptr)
/* Not one of ours; don't touch it. */
return;
}
+
+ if (!hdr->pool_list) {
+ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL,
+ LG_MSG_INVALID_ARG,
+ "invalid argument hdr->pool_list NULL");
+ return;
+ }
+
pool_list = hdr->pool_list;
pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST];
- GF_ATOMIC_DEC(hdr->pool->active);
+ if (hdr->pool)
+ GF_ATOMIC_DEC(hdr->pool->active);
hdr->magic = GF_MEM_INVALID_MAGIC;
@@ -917,27 +930,3 @@ mem_put(void *ptr)
}
#endif /* GF_DISABLE_MEMPOOL */
}
-
-void
-mem_pool_destroy(struct mem_pool *pool)
-{
- if (!pool)
- return;
-
- /* remove this pool from the owner (glusterfs_ctx_t) */
- LOCK(&pool->ctx->lock);
- {
- list_del(&pool->owner);
- }
- UNLOCK(&pool->ctx->lock);
-
- /* free this pool, but keep the mem_pool_shared */
- GF_FREE(pool);
-
- /*
- * Pools are now permanent, so the mem_pool->pool is kept around. All
- * of the objects *in* the pool will eventually be freed via the
- * pool-sweeper thread, and this way we don't have to add a lot of
- * reference-counting complexity.
- */
-}