diff options
Diffstat (limited to 'xlators/lib/src')
| -rw-r--r-- | xlators/lib/src/libxlator.c | 490 | ||||
| -rw-r--r-- | xlators/lib/src/libxlator.h | 147 |
2 files changed, 637 insertions, 0 deletions
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c new file mode 100644 index 00000000000..8075fa0c29f --- /dev/null +++ b/xlators/lib/src/libxlator.c @@ -0,0 +1,490 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "libxlator.h" + +int marker_xtime_default_gauge[] = { + [MCNT_FOUND] = 1, [MCNT_NOTFOUND] = -1, [MCNT_ENODATA] = -1, + [MCNT_ENOTCONN] = -1, [MCNT_ENOENT] = -1, [MCNT_EOTHER] = -1, +}; + +int marker_uuid_default_gauge[] = { + [MCNT_FOUND] = 1, [MCNT_NOTFOUND] = 0, [MCNT_ENODATA] = 0, + [MCNT_ENOTCONN] = 0, [MCNT_ENOENT] = 0, [MCNT_EOTHER] = 0, +}; + +static int marker_idx_errno_map[] = { + [MCNT_FOUND] = EINVAL, [MCNT_NOTFOUND] = EINVAL, + [MCNT_ENOENT] = ENOENT, [MCNT_ENOTCONN] = ENOTCONN, + [MCNT_ENODATA] = ENODATA, [MCNT_EOTHER] = EINVAL, + [MCNT_MAX] = 0, +}; + +/*Copy the contents of oldtimebuf to newtimbuf*/ +static void +update_timebuf(uint32_t *oldtimbuf, uint32_t *newtimebuf) +{ + newtimebuf[0] = (oldtimbuf[0]); + newtimebuf[1] = (oldtimbuf[1]); +} + +/* Convert Timebuf in network order to host order */ +static void +get_hosttime(uint32_t *oldtimbuf, uint32_t *newtimebuf) +{ + newtimebuf[0] = ntohl(oldtimbuf[0]); + newtimebuf[1] = ntohl(oldtimbuf[1]); +} + +/* Match the Incoming trusted.glusterfs.<uuid>.xtime against volume uuid */ +int +match_uuid_local(const char *name, char *uuid) +{ + if (!uuid || !*uuid) + return -1; + + name = strtail((char *)name, MARKER_XATTR_PREFIX); + if (!name || name++ [0] != '.') + return -1; + + name = strtail((char *)name, uuid); + if (!name || strcmp(name, ".xtime") != 0) + return -1; + + return 0; +} + +static void +marker_local_incr_errcount(xl_marker_local_t *local, int op_errno) +{ + marker_result_idx_t i = -1; + + if (!local) + return; + + switch (op_errno) { + case ENODATA: + i = MCNT_ENODATA; + break; + case ENOENT: + i = MCNT_ENOENT; + break; + case ENOTCONN: + i = MCNT_ENOTCONN; + break; + default: + i = MCNT_EOTHER; + break; + } + + local->count[i]++; +} + +static int +evaluate_marker_results(int *gauge, int *count) +{ + int i = 0; + int op_errno = 0; + gf_boolean_t sane = _gf_true; + + /* check if the policy of the gauge is violated; + * if yes, try to get the best errno, ie. look + * for the first position where there is a more + * specific kind of vioilation than the generic EINVAL + */ + for (i = 0; i < MCNT_MAX; i++) { + if (sane) { + if ((gauge[i] > 0 && count[i] < gauge[i]) || + (gauge[i] < 0 && count[i] >= -gauge[i])) { + sane = _gf_false; + /* generic action: adopt corresponding errno */ + op_errno = marker_idx_errno_map[i]; + } + } else { + /* already insane; trying to get a more informative + * errno by checking subsequent counters + */ + if (count[i] > 0) + op_errno = marker_idx_errno_map[i]; + } + if (op_errno && op_errno != EINVAL) + break; + } + + return op_errno; +} + +static void +cluster_marker_unwind(call_frame_t *frame, char *key, void *value, size_t size, + dict_t *dict) +{ + xl_marker_local_t *local = frame->local; + int ret = 0; + int32_t op_ret = 0; + int32_t op_errno = 0; + gf_boolean_t unref = _gf_false; + + frame->local = local->xl_local; + + if (local->count[MCNT_FOUND]) { + if (!dict) { + dict = dict_new(); + if (dict) { + unref = _gf_true; + } else { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + } + + ret = dict_set_static_bin(dict, key, value, size); + if (ret) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + } + + op_errno = evaluate_marker_results(local->gauge, local->count); + if (op_errno) + op_ret = -1; + +out: + if (local->xl_specf_unwind) { + local->xl_specf_unwind(frame, op_ret, op_errno, dict, NULL); + } else { + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, NULL); + } + + GF_FREE(local); + if (unref) + dict_unref(dict); +} + +/* Aggregate all the <volid>.xtime attrs of the cluster and send the max*/ +int32_t +cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) + +{ + int32_t callcnt = 0; + uint32_t *net_timebuf = NULL; + uint32_t host_timebuf[2] = { + 0, + }; + char marker_xattr[128] = {0}; + xl_marker_local_t *local = NULL; + + local = frame->local; + + snprintf(marker_xattr, sizeof(marker_xattr), "%s.%s.%s", + MARKER_XATTR_PREFIX, local->vol_uuid, XTIME); + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret) { + marker_local_incr_errcount(local, op_errno); + goto unlock; + } + + if (dict_get_ptr(dict, marker_xattr, (void **)&net_timebuf)) { + local->count[MCNT_NOTFOUND]++; + UNLOCK(&frame->lock); + gf_log(this->name, GF_LOG_WARNING, + "Unable to get <uuid>.xtime attr"); + goto post_unlock; + } + + if (local->count[MCNT_FOUND]) { + get_hosttime(net_timebuf, host_timebuf); + if ((host_timebuf[0] > local->host_timebuf[0]) || + (host_timebuf[0] == local->host_timebuf[0] && + host_timebuf[1] >= local->host_timebuf[1])) { + update_timebuf(net_timebuf, local->net_timebuf); + update_timebuf(host_timebuf, local->host_timebuf); + } + + } else { + get_hosttime(net_timebuf, local->host_timebuf); + update_timebuf(net_timebuf, local->net_timebuf); + local->count[MCNT_FOUND]++; + } + } +unlock: + UNLOCK(&frame->lock); +post_unlock: + if (callcnt == 0) + cluster_marker_unwind(frame, marker_xattr, local->net_timebuf, 8, dict); + + return 0; +} + +int32_t +cluster_markeruuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) +{ + int32_t callcnt = 0; + struct volume_mark *volmark = NULL; + xl_marker_local_t *local = NULL; + int32_t ret = -1; + char *vol_uuid = NULL; + + local = frame->local; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + vol_uuid = local->vol_uuid; + + if (op_ret) { + marker_local_incr_errcount(local, op_errno); + goto unlock; + } + + ret = dict_get_bin(dict, GF_XATTR_MARKER_KEY, (void *)&volmark); + if (ret) + goto unlock; + + if (local->count[MCNT_FOUND]) { + if ((local->volmark->major != volmark->major) || + (local->volmark->minor != volmark->minor)) { + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (local->retval) { + goto unlock; + } else if (volmark->retval) { + GF_FREE(local->volmark); + local->volmark = gf_memdup(volmark, sizeof(*volmark)); + local->retval = volmark->retval; + } else if ((volmark->sec > local->volmark->sec) || + ((volmark->sec == local->volmark->sec) && + (volmark->usec >= local->volmark->usec))) { + GF_FREE(local->volmark); + local->volmark = gf_memdup(volmark, sizeof(*volmark)); + } + + } else { + local->volmark = gf_memdup(volmark, sizeof(*volmark)); + VALIDATE_OR_GOTO(local->volmark, unlock); + gf_uuid_unparse(volmark->uuid, vol_uuid); + if (volmark->retval) + local->retval = volmark->retval; + local->count[MCNT_FOUND]++; + } + } +unlock: + UNLOCK(&frame->lock); + + if (callcnt == 0) + cluster_marker_unwind(frame, GF_XATTR_MARKER_KEY, local->volmark, + sizeof(*local->volmark), dict); + + return 0; +} + +int +gf_get_min_stime(xlator_t *this, dict_t *dst, char *key, data_t *value) +{ + int ret = -1; + uint32_t *net_timebuf = NULL; + uint32_t *value_timebuf = NULL; + uint32_t host_timebuf[2] = { + 0, + }; + uint32_t host_value_timebuf[2] = { + 0, + }; + + /* stime should be minimum of all the other nodes */ + ret = dict_get_bin(dst, key, (void **)&net_timebuf); + if (ret < 0) { + net_timebuf = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char); + if (!net_timebuf) + goto out; + + ret = dict_set_bin(dst, key, net_timebuf, sizeof(int64_t)); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "key=%s: dict set failed", key); + goto error; + } + } + + value_timebuf = data_to_bin(value); + if (!value_timebuf) { + gf_log(this->name, GF_LOG_WARNING, + "key=%s: getting value of stime failed", key); + ret = -1; + goto out; + } + + get_hosttime(value_timebuf, host_value_timebuf); + get_hosttime(net_timebuf, host_timebuf); + + /* can't use 'min()' macro here as we need to compare two fields + in the array, selectively */ + if ((host_value_timebuf[0] < host_timebuf[0]) || + ((host_value_timebuf[0] == host_timebuf[0]) && + (host_value_timebuf[1] < host_timebuf[1]))) { + update_timebuf(value_timebuf, net_timebuf); + } + + ret = 0; +out: + return ret; +error: + /* To be used only when net_timebuf is not set in the dict */ + if (net_timebuf) + GF_FREE(net_timebuf); + + return ret; +} + +int +gf_get_max_stime(xlator_t *this, dict_t *dst, char *key, data_t *value) +{ + int ret = -ENOMEM; + uint32_t *net_timebuf = NULL; + uint32_t *value_timebuf = NULL; + uint32_t host_timebuf[2] = { + 0, + }; + uint32_t host_value_timebuf[2] = { + 0, + }; + + /* stime should be maximum of all the other nodes */ + ret = dict_get_bin(dst, key, (void **)&net_timebuf); + if (ret < 0) { + net_timebuf = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char); + if (!net_timebuf) + goto out; + + ret = dict_set_bin(dst, key, net_timebuf, sizeof(int64_t)); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "key=%s: dict set failed", key); + goto error; + } + } + + value_timebuf = data_to_bin(value); + if (!value_timebuf) { + gf_log(this->name, GF_LOG_WARNING, + "key=%s: getting value of stime failed", key); + ret = -EINVAL; + goto out; + } + + get_hosttime(value_timebuf, host_value_timebuf); + get_hosttime(net_timebuf, host_timebuf); + + /* can't use 'max()' macro here as we need to compare two fields + in the array, selectively */ + if ((host_value_timebuf[0] > host_timebuf[0]) || + ((host_value_timebuf[0] == host_timebuf[0]) && + (host_value_timebuf[1] > host_timebuf[1]))) { + update_timebuf(value_timebuf, net_timebuf); + } + + ret = 0; +out: + return ret; +error: + /* To be used only when net_timebuf is not set in the dict */ + if (net_timebuf) + GF_FREE(net_timebuf); + + return ret; +} + +static int +_get_children_count(xlator_t *xl) +{ + int i = 0; + xlator_list_t *trav = NULL; + for (i = 0, trav = xl->children; trav; trav = trav->next, i++) { + /*'i' will have the value */ + } + + return i; +} + +int +cluster_handle_marker_getxattr(call_frame_t *frame, loc_t *loc, + const char *name, char *vol_uuid, + xlator_specf_unwind_t unwind, + int (*populate_args)(call_frame_t *frame, + int type, int *gauge, + xlator_t **subvols)) +{ + xlator_t *this = frame->this; + xlator_t **subvols = NULL; + int num_subvols = 0; + int type = 0; + int i = 0; + int gauge[MCNT_MAX] = {0}; + xl_marker_local_t *local = NULL; + + if (GF_CLIENT_PID_GSYNCD != frame->root->pid) + return -EINVAL; + + if (name == NULL) + return -EINVAL; + + if (strcmp(GF_XATTR_MARKER_KEY, name) == 0) { + type = MARKER_UUID_TYPE; + memcpy(gauge, marker_uuid_default_gauge, sizeof(gauge)); + } else if (match_uuid_local(name, vol_uuid) == 0) { + type = MARKER_XTIME_TYPE; + memcpy(gauge, marker_xtime_default_gauge, sizeof(gauge)); + } else { + return -EINVAL; + } + + num_subvols = _get_children_count(this); + subvols = alloca(num_subvols * sizeof(*subvols)); + num_subvols = populate_args(frame, type, gauge, subvols); + + local = GF_CALLOC(sizeof(struct marker_str), 1, + gf_common_mt_libxl_marker_local); + + if (!local) + goto fail; + + local->xl_local = frame->local; + local->call_count = num_subvols; + local->xl_specf_unwind = unwind; + local->vol_uuid = vol_uuid; + memcpy(local->gauge, gauge, sizeof(local->gauge)); + + frame->local = local; + + for (i = 0; i < num_subvols; i++) { + if (MARKER_UUID_TYPE == type) + STACK_WIND(frame, cluster_markeruuid_cbk, subvols[i], + subvols[i]->fops->getxattr, loc, name, NULL); + else if (MARKER_XTIME_TYPE == type) + STACK_WIND(frame, cluster_markerxtime_cbk, subvols[i], + subvols[i]->fops->getxattr, loc, name, NULL); + } + + return 0; +fail: + if (unwind) + unwind(frame, -1, ENOMEM, NULL, NULL); + else + default_getxattr_failure_cbk(frame, ENOMEM); + return 0; +} diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h new file mode 100644 index 00000000000..81da4060d55 --- /dev/null +++ b/xlators/lib/src/libxlator.h @@ -0,0 +1,147 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _LIBXLATOR_H +#define _LIBXLATOR_H + +#include <glusterfs/defaults.h> + +#include <stdint.h> // for int32_t +#include "glusterfs/dict.h" // for dict_t, data_t +#include "glusterfs/globals.h" // for xlator_t, loc_t +#include "glusterfs/stack.h" // for call_frame_t +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> + +#define MARKER_XATTR_PREFIX "trusted.glusterfs" +#define XTIME "xtime" +#define VOLUME_MARK "volume-mark" +#define GF_XATTR_MARKER_KEY MARKER_XATTR_PREFIX "." VOLUME_MARK +#define UUID_SIZE 36 +#define MARKER_UUID_TYPE 1 +#define MARKER_XTIME_TYPE 2 + +typedef int32_t (*xlator_specf_unwind_t)(call_frame_t *frame, int op_ret, + int op_errno, dict_t *dict, + dict_t *xdata); + +struct volume_mark { + uint8_t major; + uint8_t minor; + uint8_t uuid[16]; + uint8_t retval; + uint32_t sec; + uint32_t usec; +} __attribute__((__packed__)); + +/* + * The enumerated type here + * is used to index two kind + * of integer arrays: + * - gauges + * - counters + + * A counter is used internally, + * in getxattr callbacks, to count + * the results, categorized as + * the enum names suggest. So values + * in the counter are always non-negative. + + * Gauges are part of the API. + * The caller passes one to the + * top-level aggregator function, + * cluster_getmarkerattr(). The gauge + * defines an evaluation policy for the + * counter. That is, at the + * end of the aggregation process + * the gauge is matched against the + * counter, and the policy + * represented by the gauge decides + * whether to return with success or failure, + * and in latter case, what particular failure + * case (errno). + + * The rules are the following: for some index i, + * - if gauge[i] == 0, no requirement is set + * against counter[i]; + * - if gauge[i] > 0, counter[i] >= gauge[i] + * is required; + * - if gauge[i] < 0, counter[i] < |gauge[i]| + * is required. + + * If the requirement is not met, then i is mapped + * to the respective errno (MCNT_ENOENT -> ENOENT), + * or in lack of that, EINVAL. + + * Cf. evaluate_marker_results() and marker_idx_errno_map[] + * in libxlator.c + + * We provide two default gauges, one intended for xtime + * aggregation, other for volume mark aggregation. The + * policies they represent agree with the hard-coded + * one prior to gauges. Cf. marker_xtime_default_gauge + * and marker_uuid_default_gauge in libxlator.c + */ + +typedef enum { + MCNT_FOUND, + MCNT_NOTFOUND, + MCNT_ENODATA, + MCNT_ENOTCONN, + MCNT_ENOENT, + MCNT_EOTHER, + MCNT_MAX +} marker_result_idx_t; + +extern int marker_xtime_default_gauge[]; +extern int marker_uuid_default_gauge[]; + +struct marker_str { + struct volume_mark *volmark; + data_t *data; + + uint32_t host_timebuf[2]; + uint32_t net_timebuf[2]; + int32_t call_count; + int gauge[MCNT_MAX]; + int count[MCNT_MAX]; + + xlator_specf_unwind_t xl_specf_unwind; + void *xl_local; + char *vol_uuid; + uint8_t retval; +}; + +typedef struct marker_str xl_marker_local_t; + +int32_t +cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata); + +int32_t +cluster_markeruuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata); + +int +cluster_handle_marker_getxattr(call_frame_t *frame, loc_t *loc, + const char *name, char *vol_uuid, + xlator_specf_unwind_t unwind, + int (*populate_args)(call_frame_t *frame, + int type, int *gauge, + xlator_t **subvols)); +int +match_uuid_local(const char *name, char *uuid); + +int +gf_get_min_stime(xlator_t *this, dict_t *dst, char *key, data_t *value); + +int +gf_get_max_stime(xlator_t *this, dict_t *dst, char *key, data_t *value); + +#endif /* !_LIBXLATOR_H */ |
