diff options
Diffstat (limited to 'xlators/cluster/ec/src/ec-code.c')
| -rw-r--r-- | xlators/cluster/ec/src/ec-code.c | 380 |
1 files changed, 268 insertions, 112 deletions
diff --git a/xlators/cluster/ec/src/ec-code.c b/xlators/cluster/ec/src/ec-code.c index a1f652779f3..03162ae05a9 100644 --- a/xlators/cluster/ec/src/ec-code.c +++ b/xlators/cluster/ec/src/ec-code.c @@ -14,12 +14,13 @@ #include <sys/stat.h> #include <ctype.h> -#include "syscall.h" +#include <glusterfs/syscall.h> #include "ec-mem-types.h" #include "ec-code.h" #include "ec-messages.h" #include "ec-code-c.h" +#include "ec-helpers.h" #ifdef USE_EC_DYNAMIC_X64 #include "ec-code-x64.h" @@ -33,6 +34,11 @@ #include "ec-code-avx.h" #endif +#define EC_CODE_SIZE (1024 * 64) +#define EC_CODE_ALIGN 4096 + +#define EC_CODE_CHUNK_MIN_SIZE 512 + #define EC_PROC_BUFFER_SIZE 4096 #define PROC_CPUINFO "/proc/cpuinfo" @@ -41,13 +47,13 @@ struct _ec_code_proc; typedef struct _ec_code_proc ec_code_proc_t; struct _ec_code_proc { - int32_t fd; + int32_t fd; gf_boolean_t eof; gf_boolean_t error; gf_boolean_t skip; - ssize_t size; - ssize_t pos; - char buffer[EC_PROC_BUFFER_SIZE]; + ssize_t size; + ssize_t pos; + char buffer[EC_PROC_BUFFER_SIZE]; }; static ec_code_gen_t *ec_code_gen_table[] = { @@ -60,8 +66,7 @@ static ec_code_gen_t *ec_code_gen_table[] = { #ifdef USE_EC_DYNAMIC_X64 &ec_code_gen_x64, #endif - NULL -}; + NULL}; static void ec_code_arg_set(ec_code_arg_t *arg, uint32_t value) @@ -78,7 +83,6 @@ ec_code_arg_assign(ec_code_builder_t *builder, ec_code_op_t *op, if (builder->regs <= reg) { builder->regs = reg + 1; } - } static void @@ -196,17 +200,17 @@ static void ec_code_dup(ec_code_builder_t *builder, ec_gf_op_t *op) { switch (op->op) { - case EC_GF_OP_COPY: - ec_code_copy(builder, op->arg1, op->arg2); - break; - case EC_GF_OP_XOR2: - ec_code_xor2(builder, op->arg1, op->arg2); - break; - case EC_GF_OP_XOR3: - ec_code_xor3(builder, op->arg1, op->arg2, op->arg3); - break; - default: - break; + case EC_GF_OP_COPY: + ec_code_copy(builder, op->arg1, op->arg2); + break; + case EC_GF_OP_XOR2: + ec_code_xor2(builder, op->arg1, op->arg2); + break; + case EC_GF_OP_XOR3: + ec_code_xor3(builder, op->arg1, op->arg2, op->arg3); + break; + default: + break; } } @@ -279,10 +283,11 @@ ec_code_prepare(ec_code_t *code, uint32_t count, uint32_t width, count *= code->gf->bits + code->gf->max_ops; count += code->gf->bits; - builder = GF_MALLOC(sizeof(ec_code_builder_t) + - sizeof(ec_code_op_t) * count, ec_mt_ec_code_builder_t); + builder = GF_MALLOC( + sizeof(ec_code_builder_t) + sizeof(ec_code_op_t) * count, + ec_mt_ec_code_builder_t); if (builder == NULL) { - return NULL; + return EC_ERR(ENOMEM); } builder->address = 0; @@ -323,15 +328,39 @@ ec_code_chunk_from_space(ec_code_space_t *space) } static void * -ec_code_func_from_chunk(ec_code_chunk_t *chunk) +ec_code_to_executable(ec_code_space_t *space, void *addr) +{ + return (void *)((uintptr_t)addr - (uintptr_t)space + + (uintptr_t)space->exec); +} + +static void * +ec_code_from_executable(ec_code_space_t *space, void *addr) { - return (void *)((uintptr_t)chunk + ec_code_chunk_size()); + return (void *)((uintptr_t)addr - (uintptr_t)space->exec + + (uintptr_t)space); +} + +static void * +ec_code_func_from_chunk(ec_code_chunk_t *chunk, void **exec) +{ + void *addr; + + addr = (void *)((uintptr_t)chunk + ec_code_chunk_size()); + + *exec = ec_code_to_executable(chunk->space, addr); + + return addr; } static ec_code_chunk_t * ec_code_chunk_from_func(ec_code_func_linear_t func) { - return (ec_code_chunk_t *)((uintptr_t)func - ec_code_chunk_size()); + ec_code_chunk_t *chunk; + + chunk = (ec_code_chunk_t *)((uintptr_t)func - ec_code_chunk_size()); + + return ec_code_from_executable(chunk->space, chunk); } static ec_code_chunk_t * @@ -343,6 +372,7 @@ ec_code_chunk_split(ec_code_chunk_t *chunk, size_t size) avail = chunk->size - size - ec_code_chunk_size(); if (avail > 0) { extra = (ec_code_chunk_t *)((uintptr_t)chunk + chunk->size - avail); + extra->space = chunk->space; extra->size = avail; list_add(&extra->list, &chunk->list); chunk->size = size; @@ -361,18 +391,117 @@ ec_code_chunk_touch(ec_code_chunk_t *prev, ec_code_chunk_t *next) return (end == (uintptr_t)next); } +static ec_code_space_t * +ec_code_space_create(ec_code_t *code, size_t size) +{ + char path[] = GLUSTERFS_LIBEXECDIR "/ec-code-dynamic.XXXXXX"; + ec_code_space_t *space; + void *exec; + int32_t fd, err; + + /* We need to create memory areas to store the generated dynamic code. + * Obviously these areas need to be written to be able to create the + * code and they also need to be executable to execute it. + * + * However it's a bad practice to have a memory region that is both + * writable *and* executable. In fact, selinux forbids this and causes + * attempts to do so to fail (unless specifically configured). + * + * To solve the problem we'll use two distinct memory areas mapped to + * the same physical storage. One of the memory areas will have write + * permission, and the other will have execute permission. Both areas + * will have the same contents. The physical storage will be a regular + * file that will be mmapped to both areas. + */ + + /* We need to create a temporary file as the backend storage for the + * memory mapped areas. */ + /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ + fd = mkstemp(path); + if (fd < 0) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to create a temporary file for the ec dynamic " + "code"); + space = EC_ERR(err); + goto done; + } + /* Once created we don't need to keep it in the file system. It will + * still exist until we close the last file descriptor or unmap the + * memory areas bound to the file. */ + sys_unlink(path); + + size = (size + EC_CODE_ALIGN - 1) & ~(EC_CODE_ALIGN - 1); + if (sys_ftruncate(fd, size) < 0) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to resize the file for the ec dynamic code"); + space = EC_ERR(err); + goto done_close; + } + + /* This creates an executable memory area to be able to run the + * generated fragments of code. */ + exec = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + if (exec == MAP_FAILED) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to map the executable area for the ec dynamic " + "code"); + space = EC_ERR(err); + goto done_close; + } + /* It's not important to check the return value of mlock(). If it fails + * everything will continue to work normally. */ + mlock(exec, size); + + /* This maps a read/write memory area to be able to create the dynamici + * code. */ + space = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (space == MAP_FAILED) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to map the writable area for the ec dynamic " + "code"); + space = EC_ERR(err); + + munmap(exec, size); + + goto done_close; + } + + space->exec = exec; + space->size = size; + space->code = code; + list_add_tail(&space->list, &code->spaces); + INIT_LIST_HEAD(&space->chunks); + +done_close: + /* If everything has succeeded, we already have the memory areas + * mapped. We don't need the file descriptor anymore because the + * backend storage will be there until the mmap()'d regions are + * unmapped. */ + sys_close(fd); +done: + return space; +} + static void -ec_code_chunk_merge(ec_code_chunk_t *chunk) +ec_code_space_destroy(ec_code_space_t *space) { - ec_code_chunk_t *item; + list_del_init(&space->list); - list_for_each_entry(item, &chunk->space->chunks, list) { - if (ec_code_chunk_touch(item, chunk)) { - item->size += chunk->size + ec_code_chunk_size(); - chunk = item; + munmap(space->exec, space->size); + munmap(space, space->size); +} - goto check; - } +static void +ec_code_chunk_merge(ec_code_chunk_t *chunk) +{ + ec_code_chunk_t *item, *tmp; + + list_for_each_entry_safe(item, tmp, &chunk->space->chunks, list) + { if ((uintptr_t)item > (uintptr_t)chunk) { list_add_tail(&chunk->list, &item->list); if (ec_code_chunk_touch(chunk, item)) { @@ -382,15 +511,18 @@ ec_code_chunk_merge(ec_code_chunk_t *chunk) goto check; } + if (ec_code_chunk_touch(item, chunk)) { + item->size += chunk->size + ec_code_chunk_size(); + list_del_init(&item->list); + chunk = item; + } } list_add_tail(&chunk->list, &chunk->space->chunks); check: - if (chunk->size == EC_CODE_SIZE - ec_code_space_size() - - ec_code_chunk_size()) { - list_del_init(&chunk->space->list); - - munmap(chunk->space, chunk->space->size); + if (chunk->size == + chunk->space->size - ec_code_space_size() - ec_code_chunk_size()) { + ec_code_space_destroy(chunk->space); } } @@ -401,35 +533,32 @@ ec_code_space_alloc(ec_code_t *code, size_t size) ec_code_chunk_t *chunk; size_t map_size; - size = (size + 15) & ~15; - list_for_each_entry(space, &code->spaces, list) { - list_for_each_entry(chunk, &space->chunks, list) { + /* To minimize fragmentation, we only allocate chunks of sizes multiples + * of EC_CODE_CHUNK_MIN_SIZE. */ + size = ((size + ec_code_chunk_size() + EC_CODE_CHUNK_MIN_SIZE - 1) & + ~(EC_CODE_CHUNK_MIN_SIZE - 1)) - + ec_code_chunk_size(); + list_for_each_entry(space, &code->spaces, list) + { + list_for_each_entry(chunk, &space->chunks, list) + { if (chunk->size >= size) { goto out; } } } - map_size = EC_CODE_SIZE; + map_size = EC_CODE_SIZE - ec_code_space_size() - ec_code_chunk_size(); if (map_size < size) { map_size = size; } - space = mmap(NULL, map_size, PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (space == NULL) { - return NULL; + space = ec_code_space_create(code, map_size); + if (EC_IS_ERR(space)) { + return (ec_code_chunk_t *)space; } - /* It's not important to check the return value of mlock(). If it fails - * everything will continue to work normally. */ - mlock(space, map_size); - - space->code = code; - space->size = map_size; - list_add_tail(&space->list, &code->spaces); - INIT_LIST_HEAD(&space->chunks); chunk = ec_code_chunk_from_space(space); - chunk->size = EC_CODE_SIZE - ec_code_space_size() - ec_code_chunk_size(); + chunk->size = map_size - ec_code_space_size() - ec_code_chunk_size(); list_add(&chunk->list, &space->chunks); out: @@ -465,7 +594,7 @@ ec_code_free(ec_code_chunk_t *chunk) UNLOCK(lock); } -static gf_boolean_t +static int32_t ec_code_write(ec_code_builder_t *builder) { ec_code_gen_t *gen; @@ -482,31 +611,34 @@ ec_code_write(ec_code_builder_t *builder) for (i = 0; i < builder->count; i++) { op = &builder->ops[i]; switch (op->op) { - case EC_GF_OP_LOAD: - gen->load(builder, op->arg1.value, op->arg2.value, op->arg3.value); - break; - case EC_GF_OP_STORE: - gen->store(builder, op->arg1.value, op->arg3.value); - break; - case EC_GF_OP_COPY: - gen->copy(builder, op->arg1.value, op->arg2.value); - break; - case EC_GF_OP_XOR2: - gen->xor2(builder, op->arg1.value, op->arg2.value); - break; - case EC_GF_OP_XOR3: - gen->xor3(builder, op->arg1.value, op->arg2.value, op->arg3.value); - break; - case EC_GF_OP_XORM: - gen->xorm(builder, op->arg1.value, op->arg2.value, op->arg3.value); - break; - default: - break; + case EC_GF_OP_LOAD: + gen->load(builder, op->arg1.value, op->arg2.value, + op->arg3.value); + break; + case EC_GF_OP_STORE: + gen->store(builder, op->arg1.value, op->arg3.value); + break; + case EC_GF_OP_COPY: + gen->copy(builder, op->arg1.value, op->arg2.value); + break; + case EC_GF_OP_XOR2: + gen->xor2(builder, op->arg1.value, op->arg2.value); + break; + case EC_GF_OP_XOR3: + gen->xor3(builder, op->arg1.value, op->arg2.value, + op->arg3.value); + break; + case EC_GF_OP_XORM: + gen->xorm(builder, op->arg1.value, op->arg2.value, + op->arg3.value); + break; + default: + break; } } gen->epilog(builder); - return builder->error == 0; + return builder->error; } static void * @@ -514,22 +646,24 @@ ec_code_compile(ec_code_builder_t *builder) { ec_code_chunk_t *chunk; void *func; + int32_t err; - if (!ec_code_write(builder)) { - return NULL; + err = ec_code_write(builder); + if (err != 0) { + return EC_ERR(err); } chunk = ec_code_alloc(builder->code, builder->size); - if (chunk == NULL) { - return NULL; + if (EC_IS_ERR(chunk)) { + return chunk; } - func = ec_code_func_from_chunk(chunk); - builder->data = (uint8_t *)func; + builder->data = ec_code_func_from_chunk(chunk, &func); - if (!ec_code_write(builder)) { + err = ec_code_write(builder); + if (err != 0) { ec_code_free(chunk); - return NULL; + return EC_ERR(err); } GF_FREE(builder); @@ -544,7 +678,7 @@ ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen) code = GF_MALLOC(sizeof(ec_code_t), ec_mt_ec_code_t); if (code == NULL) { - return NULL; + return EC_ERR(ENOMEM); } memset(code, 0, sizeof(ec_code_t)); INIT_LIST_HEAD(&code->spaces); @@ -552,11 +686,6 @@ ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen) code->gf = gf; code->gen = gen; - if (gen == NULL) { - code->width = sizeof(uint64_t); - } else { - code->width = gen->width; - } return code; } @@ -589,25 +718,16 @@ ec_code_value_next(uint32_t *values, uint32_t count, uint32_t *offset) return next; } -void * -ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, - uint32_t count, gf_boolean_t linear) +static void * +ec_code_build_dynamic(ec_code_t *code, uint32_t width, uint32_t *values, + uint32_t count, gf_boolean_t linear) { ec_code_builder_t *builder; uint32_t offset, val, next; - if (code->gen == NULL) { - ec_code_c_prepare(code->gf, values, count); - if (linear) { - return ec_code_c_linear; - } else { - return ec_code_c_interleaved; - } - } - builder = ec_code_prepare(code, count, width, linear); - if (builder == NULL) { - return NULL; + if (EC_IS_ERR(builder)) { + return builder; } offset = -1; @@ -631,6 +751,38 @@ ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, return ec_code_compile(builder); } +static void * +ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count, + gf_boolean_t linear) +{ + void *func; + + if (code->gen != NULL) { + func = ec_code_build_dynamic(code, width, values, count, linear); + if (!EC_IS_ERR(func)) { + return func; + } + + gf_msg_debug(THIS->name, GF_LOG_DEBUG, + "Unable to generate dynamic code. Falling back " + "to precompiled code"); + + /* The dynamic code generation shouldn't fail in normal + * conditions, but if it fails at some point, it's very + * probable that it will fail again, so we completely disable + * dynamic code generation. */ + code->gen = NULL; + } + + ec_code_c_prepare(code->gf, values, count); + + if (linear) { + return ec_code_c_linear; + } + + return ec_code_c_interleaved; +} + ec_code_func_linear_t ec_code_build_linear(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count) @@ -643,14 +795,15 @@ ec_code_func_interleaved_t ec_code_build_interleaved(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count) { - return (ec_code_func_interleaved_t)ec_code_build(code, width, values, - count, _gf_false); + return (ec_code_func_interleaved_t)ec_code_build(code, width, values, count, + _gf_false); } void ec_code_release(ec_code_t *code, ec_code_func_t *func) { - if (code->gen != NULL) { + if ((func->linear != ec_code_c_linear) && + (func->interleaved != ec_code_c_interleaved)) { ec_code_free(ec_code_chunk_from_func(func->linear)); } } @@ -659,6 +812,8 @@ void ec_code_error(ec_code_builder_t *builder, int32_t error) { if (builder->error == 0) { + gf_msg(THIS->name, GF_LOG_ERROR, error, EC_MSG_DYN_CODEGEN_FAILED, + "Failed to generate dynamic code"); builder->error = error; } } @@ -789,7 +944,7 @@ ec_code_cpu_check(uint32_t idx, char *list, uint32_t count) { ec_code_gen_t *gen; char **ptr; - char *table[count]; + char *table[count + 1]; uint32_t i; for (i = 0; i < count; i++) { @@ -834,7 +989,7 @@ ec_code_detect(xlator_t *xl, const char *def) return NULL; } - file.fd = sys_openat(AT_FDCWD, PROC_CPUINFO, O_RDONLY); + file.fd = sys_open(PROC_CPUINFO, O_RDONLY, 0); if (file.fd < 0) { goto out; } @@ -852,7 +1007,8 @@ ec_code_detect(xlator_t *xl, const char *def) if (ec_code_gen_table[select] == NULL) { gf_msg(xl->name, GF_LOG_WARNING, EINVAL, EC_MSG_EXTENSION_UNKNOWN, "CPU extension '%s' is not known. Not using any cpu " - "extensions", def); + "extensions", + def); return NULL; } @@ -882,7 +1038,7 @@ ec_code_detect(xlator_t *xl, const char *def) if (file.error) { gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_EXTENSION_FAILED, - "Unable to detemine supported CPU extensions. Not using any " + "Unable to determine supported CPU extensions. Not using any " "cpu extensions"); gen = NULL; |
