summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-combine.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-combine.c')
-rw-r--r--xlators/cluster/ec/src/ec-combine.c787
1 files changed, 787 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
new file mode 100644
index 00000000000..07d819a9a3d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -0,0 +1,787 @@
+/*
+ Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es>
+
+ This file is part of the cluster/ec translator for GlusterFS.
+
+ The cluster/ec translator for GlusterFS is free software: you can
+ redistribute it and/or modify it under the terms of the GNU General
+ Public License as published by the Free Software Foundation, either
+ version 3 of the License, or (at your option) any later version.
+
+ The cluster/ec translator for GlusterFS is distributed in the hope
+ that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with the cluster/ec translator for GlusterFS. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <fnmatch.h>
+
+#include "libxlator.h"
+
+#include "ec-data.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+
+struct _ec_dict_info;
+typedef struct _ec_dict_info ec_dict_info_t;
+
+struct _ec_dict_combine;
+typedef struct _ec_dict_combine ec_dict_combine_t;
+
+struct _ec_dict_info
+{
+ dict_t * dict;
+ int32_t count;
+};
+
+struct _ec_dict_combine
+{
+ ec_cbk_data_t * cbk;
+ int32_t which;
+};
+
+void ec_iatt_time_merge(uint32_t * dst_sec, uint32_t * dst_nsec,
+ uint32_t src_sec, uint32_t src_nsec)
+{
+ if ((*dst_sec < src_sec) ||
+ ((*dst_sec == src_sec) && (*dst_nsec < src_nsec)))
+ {
+ *dst_sec = src_sec;
+ *dst_nsec = src_nsec;
+ }
+}
+
+int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count)
+{
+ int32_t i;
+
+ for (i = 0; i < count; i++)
+ {
+ if ((dst->ia_ino != src->ia_ino) ||
+ (dst->ia_uid != src->ia_uid) ||
+ (dst->ia_gid != src->ia_gid) ||
+ (((dst->ia_type == IA_IFBLK) || (dst->ia_type == IA_IFCHR)) &&
+ (dst->ia_rdev != src->ia_rdev)) ||
+ ((dst->ia_type == IA_IFREG) && (dst->ia_size != src->ia_size)) ||
+ (st_mode_from_ia(dst->ia_prot, dst->ia_type) !=
+ st_mode_from_ia(src->ia_prot, src->ia_type)) ||
+ (uuid_compare(dst->ia_gfid, src->ia_gfid) != 0))
+ {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "Failed to combine iatt (inode: %lu-%lu, links: %u-%u, "
+ "uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, "
+ "mode: %o-%o)",
+ dst->ia_ino, src->ia_ino, dst->ia_nlink, src->ia_nlink,
+ dst->ia_uid, src->ia_uid, dst->ia_gid, src->ia_gid,
+ dst->ia_rdev, src->ia_rdev, dst->ia_size, src->ia_size,
+ st_mode_from_ia(dst->ia_prot, dst->ia_type),
+ st_mode_from_ia(src->ia_prot, dst->ia_type));
+
+ return 0;
+ }
+ }
+
+ while (count-- > 0)
+ {
+ dst->ia_blocks += src->ia_blocks;
+ if (dst->ia_blksize < src->ia_blksize)
+ {
+ dst->ia_blksize = src->ia_blksize;
+ }
+
+ ec_iatt_time_merge(&dst->ia_atime, &dst->ia_atime_nsec, src->ia_atime,
+ src->ia_atime_nsec);
+ ec_iatt_time_merge(&dst->ia_mtime, &dst->ia_mtime_nsec, src->ia_mtime,
+ src->ia_mtime_nsec);
+ ec_iatt_time_merge(&dst->ia_ctime, &dst->ia_ctime_nsec, src->ia_ctime,
+ src->ia_ctime_nsec);
+ }
+
+ return 1;
+}
+
+void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
+ int32_t answers)
+{
+ size_t blocks;
+
+ while (count-- > 0)
+ {
+ blocks = iatt[count].ia_blocks * ec->fragments + answers - 1;
+ blocks /= answers;
+ iatt[count].ia_blocks = blocks;
+ }
+}
+
+int32_t ec_dict_data_compare(dict_t * dict, char * key, data_t * value,
+ void * arg)
+{
+ ec_dict_info_t * info = arg;
+ data_t * data;
+
+ data = dict_get(info->dict, key);
+ if (data == NULL)
+ {
+ gf_log("ec", GF_LOG_DEBUG, "key '%s' found only on one dict", key);
+
+ return -1;
+ }
+
+ info->count--;
+
+ if ((strcmp(key, GF_CONTENT_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_CLRLK_CMD) == 0) ||
+ (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) ||
+ (fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) ||
+ (XATTR_IS_NODE_UUID(key)))
+ {
+ return 0;
+ }
+
+ if ((data->len != value->len) ||
+ (memcmp(data->data, value->data, data->len) != 0))
+ {
+ gf_log("ec", GF_LOG_DEBUG, "key '%s' is different (size: %u, %u)",
+ key, data->len, value->len);
+
+ return -1;
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_data_show(dict_t * dict, char * key, data_t * value,
+ void * arg)
+{
+ if (dict_get(arg, key) == NULL)
+ {
+ gf_log("ec", GF_LOG_DEBUG, "key '%s' found only on one dict", key);
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2)
+{
+ ec_dict_info_t info;
+ dict_t * dict;
+
+ if (dict1 != NULL)
+ {
+ info.dict = dict1;
+ info.count = dict1->count;
+ dict = dict2;
+ }
+ else if (dict2 != NULL)
+ {
+ info.dict = dict2;
+ info.count = dict2->count;
+ dict = dict1;
+ }
+ else
+ {
+ return 1;
+ }
+
+ if (dict != NULL)
+ {
+ if (dict_foreach(dict, ec_dict_data_compare, &info) != 0)
+ {
+ return 0;
+ }
+ }
+
+ if (info.count != 0)
+ {
+ dict_foreach(info.dict, ec_dict_data_show, dict);
+ }
+
+ return (info.count == 0);
+}
+
+int32_t ec_dict_list(data_t ** list, int32_t * count, ec_cbk_data_t * cbk,
+ int32_t which, char * key)
+{
+ ec_cbk_data_t * ans;
+ dict_t * dict;
+ int32_t i, max;
+
+ max = *count;
+ i = 0;
+ for (ans = cbk; ans != NULL; ans = ans->next)
+ {
+ if (i >= max)
+ {
+ gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Unexpected number of "
+ "dictionaries");
+
+ return 0;
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict;
+ list[i] = dict_get(dict, key);
+ if (list[i] == NULL)
+ {
+ gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Unexpected missing "
+ "dictionary entry");
+
+ return 0;
+ }
+
+ i++;
+ }
+
+ *count = i;
+
+ return 1;
+}
+
+char * ec_concat_prepare(xlator_t * xl, char ** sep, char ** post,
+ const char * fmt, va_list args)
+{
+ char * str, * tmp;
+ int32_t len;
+
+ len = gf_vasprintf(&str, fmt, args);
+ if (len < 0)
+ {
+ return NULL;
+ }
+
+ tmp = strchr(str, '{');
+ if (tmp == NULL)
+ {
+ goto out;
+ }
+ *tmp++ = 0;
+ *sep = tmp;
+ tmp = strchr(tmp, '}');
+ if (tmp == NULL)
+ {
+ goto out;
+ }
+ *tmp++ = 0;
+ *post = tmp;
+
+ return str;
+
+out:
+ gf_log(xl->name, GF_LOG_ERROR, "Invalid concat format");
+
+ GF_FREE(str);
+
+ return NULL;
+}
+
+int32_t ec_dict_data_concat(const char * fmt, ec_cbk_data_t * cbk,
+ int32_t which, char * key, ...)
+{
+ data_t * data[cbk->count];
+ size_t len, tmp;
+ char * str = NULL, * pre = NULL, * sep, * post;
+ dict_t * dict;
+ va_list args;
+ int32_t i, num, prelen, postlen, seplen;
+ int32_t ret = -1;
+
+ num = cbk->count;
+ if (!ec_dict_list(data, &num, cbk, which, key))
+ {
+ return -1;
+ }
+
+ va_start(args, key);
+ pre = ec_concat_prepare(cbk->fop->xl, &sep, &post, fmt, args);
+ va_end(args);
+
+ if (pre == NULL)
+ {
+ return -1;
+ }
+
+ prelen = strlen(pre);
+ seplen = strlen(sep);
+ postlen = strlen(post);
+
+ len = prelen + (num - 1) * seplen + postlen + 1;
+ for (i = 0; i < num; i++)
+ {
+ len += data[i]->len - 1;
+ }
+
+ str = GF_MALLOC(len, gf_common_mt_char);
+ if (str == NULL)
+ {
+ goto out;
+ }
+
+ memcpy(str, pre, prelen);
+ len = prelen;
+ for (i = 0; i < num; i++)
+ {
+ memcpy(str + len, sep, seplen);
+ len += seplen;
+ tmp = data[i]->len - 1;
+ memcpy(str + len, data[i]->data, tmp);
+ len += tmp;
+ }
+ memcpy(str + len, post, postlen + 1);
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ if (dict_set_dynstr(dict, key, str) != 0)
+ {
+ goto out;
+ }
+
+ str = NULL;
+
+ ret = 0;
+
+out:
+ GF_FREE(str);
+ GF_FREE(pre);
+
+ return ret;
+}
+
+int32_t ec_dict_data_merge(ec_cbk_data_t * cbk, int32_t which, char * key)
+{
+ data_t * data[cbk->count];
+ dict_t * dict, * lockinfo, * tmp;
+ char * ptr = NULL;
+ int32_t i, num, len;
+ int32_t ret = -1;
+
+ num = cbk->count;
+ if (!ec_dict_list(data, &num, cbk, which, key))
+ {
+ return -1;
+ }
+
+ if (dict_unserialize(data[0]->data, data[0]->len, &lockinfo) != 0)
+ {
+ return -1;
+ }
+
+ for (i = 1; i < num; i++)
+ {
+ if (dict_unserialize(data[i]->data, data[i]->len, &tmp) != 0)
+ {
+ goto out;
+ }
+ if (dict_copy(tmp, lockinfo) == NULL)
+ {
+ dict_unref(tmp);
+
+ goto out;
+ }
+
+ dict_unref(tmp);
+ }
+
+ len = dict_serialized_length(lockinfo);
+ if (len < 0)
+ {
+ goto out;
+ }
+ ptr = GF_MALLOC(len, gf_common_mt_char);
+ if (ptr == NULL)
+ {
+ goto out;
+ }
+ if (dict_serialize(lockinfo, ptr) != 0)
+ {
+ goto out;
+ }
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ if (dict_set_dynptr(dict, key, ptr, len) != 0)
+ {
+ goto out;
+ }
+
+ ptr = NULL;
+
+ ret = 0;
+
+out:
+ GF_FREE(ptr);
+ dict_unref(lockinfo);
+
+ return ret;
+}
+
+int32_t ec_dict_data_uuid(ec_cbk_data_t * cbk, int32_t which, char * key)
+{
+ ec_cbk_data_t * ans, * min;
+ dict_t * src, * dst;
+ data_t * data;
+
+ min = cbk;
+ for (ans = cbk->next; ans != NULL; ans = ans->next)
+ {
+ if (ans->idx < min->idx)
+ {
+ min = ans;
+ }
+ }
+
+ if (min != cbk)
+ {
+ src = (which == EC_COMBINE_XDATA) ? min->xdata : min->dict;
+ dst = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+
+ data = dict_get(src, key);
+ if (data == NULL)
+ {
+ return -1;
+ }
+ if (dict_set(dst, key, data) != 0)
+ {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_data_max(ec_cbk_data_t * cbk, int32_t which, char * key)
+{
+ data_t * data[cbk->count];
+ dict_t * dict;
+ int32_t i, num;
+ uint32_t max, tmp;
+
+ num = cbk->count;
+ if (!ec_dict_list(data, &num, cbk, which, key))
+ {
+ return -1;
+ }
+
+ if (num <= 1)
+ {
+ return 0;
+ }
+
+ max = data_to_uint32(data[0]);
+ for (i = 1; i < num; i++)
+ {
+ tmp = data_to_uint32(data[i]);
+ if (max < tmp)
+ {
+ max = tmp;
+ }
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ if (dict_set_uint32(dict, key, max) != 0)
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_data_stime(ec_cbk_data_t * cbk, int32_t which, char * key)
+{
+ data_t * data[cbk->count];
+ dict_t * dict;
+ int32_t i, num;
+
+ num = cbk->count;
+ if (!ec_dict_list(data, &num, cbk, which, key))
+ {
+ return -1;
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ for (i = 1; i < num; i++)
+ {
+ if (gf_get_max_stime(cbk->fop->xl, dict, key, data[i]) != 0)
+ {
+ gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "STIME combination "
+ "failed");
+
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value,
+ void * arg)
+{
+ ec_dict_combine_t * data = arg;
+
+ if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0))
+ {
+ return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which,
+ key, data->cbk->fop->xl->name);
+ }
+
+ if (strncmp(key, GF_XATTR_CLRLK_CMD, strlen(GF_XATTR_CLRLK_CMD)) == 0)
+ {
+ return ec_dict_data_concat("{\n}", data->cbk, data->which, key);
+ }
+
+ if (strncmp(key, GF_XATTR_LOCKINFO_KEY,
+ strlen(GF_XATTR_LOCKINFO_KEY)) == 0)
+ {
+ return ec_dict_data_merge(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)
+ {
+ return ec_dict_data_max(data->cbk, data->which, key);
+ }
+
+ if (XATTR_IS_NODE_UUID(key))
+ {
+ return ec_dict_data_uuid(data->cbk, data->which, key);
+ }
+
+ if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ {
+ return ec_dict_data_stime(data->cbk, data->which, key);
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which)
+{
+ dict_t * dict;
+ ec_dict_combine_t data;
+
+ data.cbk = cbk;
+ data.which = which;
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ if ((dict != NULL) &&
+ (dict_foreach(dict, ec_dict_data_combine, &data) != 0))
+ {
+ gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Dictionary combination "
+ "failed");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
+ struct iovec * src_vector, int32_t src_count)
+{
+ size_t dst_size = 0, src_size = 0;
+
+ if (dst_count > 0)
+ {
+ dst_size = iov_length(dst_vector, dst_count);
+ }
+ if (src_count > 0)
+ {
+ src_size = iov_length(src_vector, src_count);
+ }
+
+ return (dst_size == src_size);
+}
+
+int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src)
+{
+ if ((dst->l_type != src->l_type) ||
+ (dst->l_whence != src->l_whence) ||
+ (dst->l_start != src->l_start) ||
+ (dst->l_len != src->l_len) ||
+ (dst->l_pid != src->l_pid) ||
+ !is_same_lkowner(&dst->l_owner, &src->l_owner))
+ {
+ return 0;
+ }
+
+ return 1;
+}
+
+void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src)
+{
+ if (dst->f_bsize < src->f_bsize)
+ {
+ dst->f_bsize = src->f_bsize;
+ }
+
+ if (dst->f_frsize < src->f_frsize)
+ {
+ dst->f_blocks *= dst->f_frsize;
+ dst->f_blocks /= src->f_frsize;
+
+ dst->f_bfree *= dst->f_frsize;
+ dst->f_bfree /= src->f_frsize;
+
+ dst->f_bavail *= dst->f_frsize;
+ dst->f_bavail /= src->f_frsize;
+
+ dst->f_frsize = src->f_frsize;
+ }
+ else if (dst->f_frsize > src->f_frsize)
+ {
+ src->f_blocks *= src->f_frsize;
+ src->f_blocks /= dst->f_frsize;
+
+ src->f_bfree *= src->f_frsize;
+ src->f_bfree /= dst->f_frsize;
+
+ src->f_bavail *= src->f_frsize;
+ src->f_bavail /= dst->f_frsize;
+ }
+ if (dst->f_blocks > src->f_blocks)
+ {
+ dst->f_blocks = src->f_blocks;
+ }
+ if (dst->f_bfree > src->f_bfree)
+ {
+ dst->f_bfree = src->f_bfree;
+ }
+ if (dst->f_bavail > src->f_bavail)
+ {
+ dst->f_bavail = src->f_bavail;
+ }
+
+ if (dst->f_files < src->f_files)
+ {
+ dst->f_files = src->f_files;
+ }
+ if (dst->f_ffree > src->f_ffree)
+ {
+ dst->f_ffree = src->f_ffree;
+ }
+ if (dst->f_favail > src->f_favail)
+ {
+ dst->f_favail = src->f_favail;
+ }
+ if (dst->f_namemax > src->f_namemax)
+ {
+ dst->f_namemax = src->f_namemax;
+ }
+
+ if (dst->f_flag != src->f_flag)
+ {
+ gf_log(THIS->name, GF_LOG_DEBUG, "Mismatching file system flags "
+ "(%lX, %lX)",
+ dst->f_flag, src->f_flag);
+ }
+ dst->f_flag &= src->f_flag;
+}
+
+int32_t ec_combine_check(ec_cbk_data_t * dst, ec_cbk_data_t * src,
+ ec_combine_f combine)
+{
+ ec_fop_data_t * fop = dst->fop;
+
+ if (dst->op_ret != src->op_ret)
+ {
+ gf_log(fop->xl->name, GF_LOG_DEBUG, "Mismatching return code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_ret, src->op_ret);
+
+ return 0;
+ }
+ if (dst->op_ret < 0)
+ {
+ if (dst->op_errno != src->op_errno)
+ {
+ gf_log(fop->xl->name, GF_LOG_DEBUG, "Mismatching errno code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_errno, src->op_errno);
+
+ return 0;
+ }
+ }
+
+ if (!ec_dict_compare(dst->xdata, src->xdata))
+ {
+ gf_log(fop->xl->name, GF_LOG_WARNING, "Mismatching xdata in answers "
+ "of '%s'",
+ ec_fop_name(fop->id));
+
+ return 0;
+ }
+
+ if ((dst->op_ret >= 0) && (combine != NULL))
+ {
+ return combine(fop, dst, src);
+ }
+
+ return 1;
+}
+
+void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine)
+{
+ ec_fop_data_t * fop = cbk->fop;
+ ec_cbk_data_t * ans = NULL, * tmp = NULL;
+ struct list_head * item = NULL;
+ int32_t needed = 0, report = 0;
+ char str[32];
+
+ LOCK(&fop->lock);
+
+ item = fop->cbk_list.prev;
+ list_for_each_entry(ans, &fop->cbk_list, list)
+ {
+ if (ec_combine_check(cbk, ans, combine))
+ {
+ cbk->count += ans->count;
+ cbk->mask |= ans->mask;
+
+ item = ans->list.prev;
+ while (item != &fop->cbk_list)
+ {
+ tmp = list_entry(item, ec_cbk_data_t, list);
+ if (tmp->count >= cbk->count)
+ {
+ break;
+ }
+ item = item->prev;
+ }
+ list_del(&ans->list);
+
+ cbk->next = ans;
+
+ break;
+ }
+ }
+ list_add(&cbk->list, item);
+
+ ec_trace("ANSWER", fop, "combine=%s[%d]",
+ ec_bin(str, sizeof(str), cbk->mask, 0), cbk->count);
+
+ if ((cbk->count == fop->expected) && (fop->answer == NULL))
+ {
+ fop->answer = cbk;
+
+ ec_update_bad(fop, cbk->mask);
+
+ report = 1;
+ }
+
+ ans = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
+ needed = fop->minimum - ans->count - fop->winds + 1;
+
+ UNLOCK(&fop->lock);
+
+ if (needed > 0)
+ {
+ ec_dispatch_next(fop, cbk->idx);
+ }
+ else if (report)
+ {
+ ec_report(fop, 0);
+ }
+}