summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-code.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-code.c')
-rw-r--r--xlators/cluster/ec/src/ec-code.c380
1 files changed, 268 insertions, 112 deletions
diff --git a/xlators/cluster/ec/src/ec-code.c b/xlators/cluster/ec/src/ec-code.c
index a1f652779f3..03162ae05a9 100644
--- a/xlators/cluster/ec/src/ec-code.c
+++ b/xlators/cluster/ec/src/ec-code.c
@@ -14,12 +14,13 @@
#include <sys/stat.h>
#include <ctype.h>
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "ec-mem-types.h"
#include "ec-code.h"
#include "ec-messages.h"
#include "ec-code-c.h"
+#include "ec-helpers.h"
#ifdef USE_EC_DYNAMIC_X64
#include "ec-code-x64.h"
@@ -33,6 +34,11 @@
#include "ec-code-avx.h"
#endif
+#define EC_CODE_SIZE (1024 * 64)
+#define EC_CODE_ALIGN 4096
+
+#define EC_CODE_CHUNK_MIN_SIZE 512
+
#define EC_PROC_BUFFER_SIZE 4096
#define PROC_CPUINFO "/proc/cpuinfo"
@@ -41,13 +47,13 @@ struct _ec_code_proc;
typedef struct _ec_code_proc ec_code_proc_t;
struct _ec_code_proc {
- int32_t fd;
+ int32_t fd;
gf_boolean_t eof;
gf_boolean_t error;
gf_boolean_t skip;
- ssize_t size;
- ssize_t pos;
- char buffer[EC_PROC_BUFFER_SIZE];
+ ssize_t size;
+ ssize_t pos;
+ char buffer[EC_PROC_BUFFER_SIZE];
};
static ec_code_gen_t *ec_code_gen_table[] = {
@@ -60,8 +66,7 @@ static ec_code_gen_t *ec_code_gen_table[] = {
#ifdef USE_EC_DYNAMIC_X64
&ec_code_gen_x64,
#endif
- NULL
-};
+ NULL};
static void
ec_code_arg_set(ec_code_arg_t *arg, uint32_t value)
@@ -78,7 +83,6 @@ ec_code_arg_assign(ec_code_builder_t *builder, ec_code_op_t *op,
if (builder->regs <= reg) {
builder->regs = reg + 1;
}
-
}
static void
@@ -196,17 +200,17 @@ static void
ec_code_dup(ec_code_builder_t *builder, ec_gf_op_t *op)
{
switch (op->op) {
- case EC_GF_OP_COPY:
- ec_code_copy(builder, op->arg1, op->arg2);
- break;
- case EC_GF_OP_XOR2:
- ec_code_xor2(builder, op->arg1, op->arg2);
- break;
- case EC_GF_OP_XOR3:
- ec_code_xor3(builder, op->arg1, op->arg2, op->arg3);
- break;
- default:
- break;
+ case EC_GF_OP_COPY:
+ ec_code_copy(builder, op->arg1, op->arg2);
+ break;
+ case EC_GF_OP_XOR2:
+ ec_code_xor2(builder, op->arg1, op->arg2);
+ break;
+ case EC_GF_OP_XOR3:
+ ec_code_xor3(builder, op->arg1, op->arg2, op->arg3);
+ break;
+ default:
+ break;
}
}
@@ -279,10 +283,11 @@ ec_code_prepare(ec_code_t *code, uint32_t count, uint32_t width,
count *= code->gf->bits + code->gf->max_ops;
count += code->gf->bits;
- builder = GF_MALLOC(sizeof(ec_code_builder_t) +
- sizeof(ec_code_op_t) * count, ec_mt_ec_code_builder_t);
+ builder = GF_MALLOC(
+ sizeof(ec_code_builder_t) + sizeof(ec_code_op_t) * count,
+ ec_mt_ec_code_builder_t);
if (builder == NULL) {
- return NULL;
+ return EC_ERR(ENOMEM);
}
builder->address = 0;
@@ -323,15 +328,39 @@ ec_code_chunk_from_space(ec_code_space_t *space)
}
static void *
-ec_code_func_from_chunk(ec_code_chunk_t *chunk)
+ec_code_to_executable(ec_code_space_t *space, void *addr)
+{
+ return (void *)((uintptr_t)addr - (uintptr_t)space +
+ (uintptr_t)space->exec);
+}
+
+static void *
+ec_code_from_executable(ec_code_space_t *space, void *addr)
{
- return (void *)((uintptr_t)chunk + ec_code_chunk_size());
+ return (void *)((uintptr_t)addr - (uintptr_t)space->exec +
+ (uintptr_t)space);
+}
+
+static void *
+ec_code_func_from_chunk(ec_code_chunk_t *chunk, void **exec)
+{
+ void *addr;
+
+ addr = (void *)((uintptr_t)chunk + ec_code_chunk_size());
+
+ *exec = ec_code_to_executable(chunk->space, addr);
+
+ return addr;
}
static ec_code_chunk_t *
ec_code_chunk_from_func(ec_code_func_linear_t func)
{
- return (ec_code_chunk_t *)((uintptr_t)func - ec_code_chunk_size());
+ ec_code_chunk_t *chunk;
+
+ chunk = (ec_code_chunk_t *)((uintptr_t)func - ec_code_chunk_size());
+
+ return ec_code_from_executable(chunk->space, chunk);
}
static ec_code_chunk_t *
@@ -343,6 +372,7 @@ ec_code_chunk_split(ec_code_chunk_t *chunk, size_t size)
avail = chunk->size - size - ec_code_chunk_size();
if (avail > 0) {
extra = (ec_code_chunk_t *)((uintptr_t)chunk + chunk->size - avail);
+ extra->space = chunk->space;
extra->size = avail;
list_add(&extra->list, &chunk->list);
chunk->size = size;
@@ -361,18 +391,117 @@ ec_code_chunk_touch(ec_code_chunk_t *prev, ec_code_chunk_t *next)
return (end == (uintptr_t)next);
}
+static ec_code_space_t *
+ec_code_space_create(ec_code_t *code, size_t size)
+{
+ char path[] = GLUSTERFS_LIBEXECDIR "/ec-code-dynamic.XXXXXX";
+ ec_code_space_t *space;
+ void *exec;
+ int32_t fd, err;
+
+ /* We need to create memory areas to store the generated dynamic code.
+ * Obviously these areas need to be written to be able to create the
+ * code and they also need to be executable to execute it.
+ *
+ * However it's a bad practice to have a memory region that is both
+ * writable *and* executable. In fact, selinux forbids this and causes
+ * attempts to do so to fail (unless specifically configured).
+ *
+ * To solve the problem we'll use two distinct memory areas mapped to
+ * the same physical storage. One of the memory areas will have write
+ * permission, and the other will have execute permission. Both areas
+ * will have the same contents. The physical storage will be a regular
+ * file that will be mmapped to both areas.
+ */
+
+ /* We need to create a temporary file as the backend storage for the
+ * memory mapped areas. */
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ fd = mkstemp(path);
+ if (fd < 0) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to create a temporary file for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+ goto done;
+ }
+ /* Once created we don't need to keep it in the file system. It will
+ * still exist until we close the last file descriptor or unmap the
+ * memory areas bound to the file. */
+ sys_unlink(path);
+
+ size = (size + EC_CODE_ALIGN - 1) & ~(EC_CODE_ALIGN - 1);
+ if (sys_ftruncate(fd, size) < 0) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to resize the file for the ec dynamic code");
+ space = EC_ERR(err);
+ goto done_close;
+ }
+
+ /* This creates an executable memory area to be able to run the
+ * generated fragments of code. */
+ exec = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+ if (exec == MAP_FAILED) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to map the executable area for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+ goto done_close;
+ }
+ /* It's not important to check the return value of mlock(). If it fails
+ * everything will continue to work normally. */
+ mlock(exec, size);
+
+ /* This maps a read/write memory area to be able to create the dynamici
+ * code. */
+ space = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (space == MAP_FAILED) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to map the writable area for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+
+ munmap(exec, size);
+
+ goto done_close;
+ }
+
+ space->exec = exec;
+ space->size = size;
+ space->code = code;
+ list_add_tail(&space->list, &code->spaces);
+ INIT_LIST_HEAD(&space->chunks);
+
+done_close:
+ /* If everything has succeeded, we already have the memory areas
+ * mapped. We don't need the file descriptor anymore because the
+ * backend storage will be there until the mmap()'d regions are
+ * unmapped. */
+ sys_close(fd);
+done:
+ return space;
+}
+
static void
-ec_code_chunk_merge(ec_code_chunk_t *chunk)
+ec_code_space_destroy(ec_code_space_t *space)
{
- ec_code_chunk_t *item;
+ list_del_init(&space->list);
- list_for_each_entry(item, &chunk->space->chunks, list) {
- if (ec_code_chunk_touch(item, chunk)) {
- item->size += chunk->size + ec_code_chunk_size();
- chunk = item;
+ munmap(space->exec, space->size);
+ munmap(space, space->size);
+}
- goto check;
- }
+static void
+ec_code_chunk_merge(ec_code_chunk_t *chunk)
+{
+ ec_code_chunk_t *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &chunk->space->chunks, list)
+ {
if ((uintptr_t)item > (uintptr_t)chunk) {
list_add_tail(&chunk->list, &item->list);
if (ec_code_chunk_touch(chunk, item)) {
@@ -382,15 +511,18 @@ ec_code_chunk_merge(ec_code_chunk_t *chunk)
goto check;
}
+ if (ec_code_chunk_touch(item, chunk)) {
+ item->size += chunk->size + ec_code_chunk_size();
+ list_del_init(&item->list);
+ chunk = item;
+ }
}
list_add_tail(&chunk->list, &chunk->space->chunks);
check:
- if (chunk->size == EC_CODE_SIZE - ec_code_space_size() -
- ec_code_chunk_size()) {
- list_del_init(&chunk->space->list);
-
- munmap(chunk->space, chunk->space->size);
+ if (chunk->size ==
+ chunk->space->size - ec_code_space_size() - ec_code_chunk_size()) {
+ ec_code_space_destroy(chunk->space);
}
}
@@ -401,35 +533,32 @@ ec_code_space_alloc(ec_code_t *code, size_t size)
ec_code_chunk_t *chunk;
size_t map_size;
- size = (size + 15) & ~15;
- list_for_each_entry(space, &code->spaces, list) {
- list_for_each_entry(chunk, &space->chunks, list) {
+ /* To minimize fragmentation, we only allocate chunks of sizes multiples
+ * of EC_CODE_CHUNK_MIN_SIZE. */
+ size = ((size + ec_code_chunk_size() + EC_CODE_CHUNK_MIN_SIZE - 1) &
+ ~(EC_CODE_CHUNK_MIN_SIZE - 1)) -
+ ec_code_chunk_size();
+ list_for_each_entry(space, &code->spaces, list)
+ {
+ list_for_each_entry(chunk, &space->chunks, list)
+ {
if (chunk->size >= size) {
goto out;
}
}
}
- map_size = EC_CODE_SIZE;
+ map_size = EC_CODE_SIZE - ec_code_space_size() - ec_code_chunk_size();
if (map_size < size) {
map_size = size;
}
- space = mmap(NULL, map_size, PROT_EXEC | PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (space == NULL) {
- return NULL;
+ space = ec_code_space_create(code, map_size);
+ if (EC_IS_ERR(space)) {
+ return (ec_code_chunk_t *)space;
}
- /* It's not important to check the return value of mlock(). If it fails
- * everything will continue to work normally. */
- mlock(space, map_size);
-
- space->code = code;
- space->size = map_size;
- list_add_tail(&space->list, &code->spaces);
- INIT_LIST_HEAD(&space->chunks);
chunk = ec_code_chunk_from_space(space);
- chunk->size = EC_CODE_SIZE - ec_code_space_size() - ec_code_chunk_size();
+ chunk->size = map_size - ec_code_space_size() - ec_code_chunk_size();
list_add(&chunk->list, &space->chunks);
out:
@@ -465,7 +594,7 @@ ec_code_free(ec_code_chunk_t *chunk)
UNLOCK(lock);
}
-static gf_boolean_t
+static int32_t
ec_code_write(ec_code_builder_t *builder)
{
ec_code_gen_t *gen;
@@ -482,31 +611,34 @@ ec_code_write(ec_code_builder_t *builder)
for (i = 0; i < builder->count; i++) {
op = &builder->ops[i];
switch (op->op) {
- case EC_GF_OP_LOAD:
- gen->load(builder, op->arg1.value, op->arg2.value, op->arg3.value);
- break;
- case EC_GF_OP_STORE:
- gen->store(builder, op->arg1.value, op->arg3.value);
- break;
- case EC_GF_OP_COPY:
- gen->copy(builder, op->arg1.value, op->arg2.value);
- break;
- case EC_GF_OP_XOR2:
- gen->xor2(builder, op->arg1.value, op->arg2.value);
- break;
- case EC_GF_OP_XOR3:
- gen->xor3(builder, op->arg1.value, op->arg2.value, op->arg3.value);
- break;
- case EC_GF_OP_XORM:
- gen->xorm(builder, op->arg1.value, op->arg2.value, op->arg3.value);
- break;
- default:
- break;
+ case EC_GF_OP_LOAD:
+ gen->load(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ case EC_GF_OP_STORE:
+ gen->store(builder, op->arg1.value, op->arg3.value);
+ break;
+ case EC_GF_OP_COPY:
+ gen->copy(builder, op->arg1.value, op->arg2.value);
+ break;
+ case EC_GF_OP_XOR2:
+ gen->xor2(builder, op->arg1.value, op->arg2.value);
+ break;
+ case EC_GF_OP_XOR3:
+ gen->xor3(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ case EC_GF_OP_XORM:
+ gen->xorm(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ default:
+ break;
}
}
gen->epilog(builder);
- return builder->error == 0;
+ return builder->error;
}
static void *
@@ -514,22 +646,24 @@ ec_code_compile(ec_code_builder_t *builder)
{
ec_code_chunk_t *chunk;
void *func;
+ int32_t err;
- if (!ec_code_write(builder)) {
- return NULL;
+ err = ec_code_write(builder);
+ if (err != 0) {
+ return EC_ERR(err);
}
chunk = ec_code_alloc(builder->code, builder->size);
- if (chunk == NULL) {
- return NULL;
+ if (EC_IS_ERR(chunk)) {
+ return chunk;
}
- func = ec_code_func_from_chunk(chunk);
- builder->data = (uint8_t *)func;
+ builder->data = ec_code_func_from_chunk(chunk, &func);
- if (!ec_code_write(builder)) {
+ err = ec_code_write(builder);
+ if (err != 0) {
ec_code_free(chunk);
- return NULL;
+ return EC_ERR(err);
}
GF_FREE(builder);
@@ -544,7 +678,7 @@ ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen)
code = GF_MALLOC(sizeof(ec_code_t), ec_mt_ec_code_t);
if (code == NULL) {
- return NULL;
+ return EC_ERR(ENOMEM);
}
memset(code, 0, sizeof(ec_code_t));
INIT_LIST_HEAD(&code->spaces);
@@ -552,11 +686,6 @@ ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen)
code->gf = gf;
code->gen = gen;
- if (gen == NULL) {
- code->width = sizeof(uint64_t);
- } else {
- code->width = gen->width;
- }
return code;
}
@@ -589,25 +718,16 @@ ec_code_value_next(uint32_t *values, uint32_t count, uint32_t *offset)
return next;
}
-void *
-ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values,
- uint32_t count, gf_boolean_t linear)
+static void *
+ec_code_build_dynamic(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count, gf_boolean_t linear)
{
ec_code_builder_t *builder;
uint32_t offset, val, next;
- if (code->gen == NULL) {
- ec_code_c_prepare(code->gf, values, count);
- if (linear) {
- return ec_code_c_linear;
- } else {
- return ec_code_c_interleaved;
- }
- }
-
builder = ec_code_prepare(code, count, width, linear);
- if (builder == NULL) {
- return NULL;
+ if (EC_IS_ERR(builder)) {
+ return builder;
}
offset = -1;
@@ -631,6 +751,38 @@ ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values,
return ec_code_compile(builder);
}
+static void *
+ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count,
+ gf_boolean_t linear)
+{
+ void *func;
+
+ if (code->gen != NULL) {
+ func = ec_code_build_dynamic(code, width, values, count, linear);
+ if (!EC_IS_ERR(func)) {
+ return func;
+ }
+
+ gf_msg_debug(THIS->name, GF_LOG_DEBUG,
+ "Unable to generate dynamic code. Falling back "
+ "to precompiled code");
+
+ /* The dynamic code generation shouldn't fail in normal
+ * conditions, but if it fails at some point, it's very
+ * probable that it will fail again, so we completely disable
+ * dynamic code generation. */
+ code->gen = NULL;
+ }
+
+ ec_code_c_prepare(code->gf, values, count);
+
+ if (linear) {
+ return ec_code_c_linear;
+ }
+
+ return ec_code_c_interleaved;
+}
+
ec_code_func_linear_t
ec_code_build_linear(ec_code_t *code, uint32_t width, uint32_t *values,
uint32_t count)
@@ -643,14 +795,15 @@ ec_code_func_interleaved_t
ec_code_build_interleaved(ec_code_t *code, uint32_t width, uint32_t *values,
uint32_t count)
{
- return (ec_code_func_interleaved_t)ec_code_build(code, width, values,
- count, _gf_false);
+ return (ec_code_func_interleaved_t)ec_code_build(code, width, values, count,
+ _gf_false);
}
void
ec_code_release(ec_code_t *code, ec_code_func_t *func)
{
- if (code->gen != NULL) {
+ if ((func->linear != ec_code_c_linear) &&
+ (func->interleaved != ec_code_c_interleaved)) {
ec_code_free(ec_code_chunk_from_func(func->linear));
}
}
@@ -659,6 +812,8 @@ void
ec_code_error(ec_code_builder_t *builder, int32_t error)
{
if (builder->error == 0) {
+ gf_msg(THIS->name, GF_LOG_ERROR, error, EC_MSG_DYN_CODEGEN_FAILED,
+ "Failed to generate dynamic code");
builder->error = error;
}
}
@@ -789,7 +944,7 @@ ec_code_cpu_check(uint32_t idx, char *list, uint32_t count)
{
ec_code_gen_t *gen;
char **ptr;
- char *table[count];
+ char *table[count + 1];
uint32_t i;
for (i = 0; i < count; i++) {
@@ -834,7 +989,7 @@ ec_code_detect(xlator_t *xl, const char *def)
return NULL;
}
- file.fd = sys_openat(AT_FDCWD, PROC_CPUINFO, O_RDONLY);
+ file.fd = sys_open(PROC_CPUINFO, O_RDONLY, 0);
if (file.fd < 0) {
goto out;
}
@@ -852,7 +1007,8 @@ ec_code_detect(xlator_t *xl, const char *def)
if (ec_code_gen_table[select] == NULL) {
gf_msg(xl->name, GF_LOG_WARNING, EINVAL, EC_MSG_EXTENSION_UNKNOWN,
"CPU extension '%s' is not known. Not using any cpu "
- "extensions", def);
+ "extensions",
+ def);
return NULL;
}
@@ -882,7 +1038,7 @@ ec_code_detect(xlator_t *xl, const char *def)
if (file.error) {
gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_EXTENSION_FAILED,
- "Unable to detemine supported CPU extensions. Not using any "
+ "Unable to determine supported CPU extensions. Not using any "
"cpu extensions");
gen = NULL;