summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-self-heal-name.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-name.c')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c616
1 files changed, 616 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
new file mode 100644
index 00000000000..834aac86d48
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -0,0 +1,616 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/events.h>
+#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
+
+int
+__afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies, void *gfid,
+ unsigned char *locked_on, int source,
+ unsigned char *sources, gf_boolean_t is_gfid_absent,
+ int *gfid_idx)
+{
+ int ret = 0;
+ int up_count = 0;
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_uuid_copy(parent->gfid, pargfid);
+
+ if (is_gfid_absent) {
+ /* Ensure all children of AFR are up before performing gfid heal, to
+ * guard against the possibility of gfid split brain. */
+
+ up_count = AFR_COUNT(priv->child_up, priv->child_count);
+ if (up_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+
+ locked_count = AFR_COUNT(locked_on, priv->child_count);
+ if (locked_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ ret = afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
+ sources, gfid, gfid_idx);
+
+out:
+ return ret;
+}
+
+int
+__afr_selfheal_name_impunge(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid, const char *bname,
+ inode_t *inode, struct afr_reply *replies,
+ int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ unsigned char *sources = NULL;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+
+ gf_uuid_copy(parent->gfid, pargfid);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[gfid_idx].poststat.ia_gfid) == 0) {
+ sources[i] = 1;
+ continue;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+
+ ret |= afr_selfheal_recreate_entry(frame, i, gfid_idx, sources, parent,
+ bname, inode, replies);
+ }
+
+ return ret;
+}
+
+int
+__afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret)
+ continue;
+
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
+ }
+
+ return ret;
+}
+
+static gf_boolean_t
+afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies)
+{
+ int i = 0;
+ int first_idx = -1;
+ gf_boolean_t need_heal = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA))
+ need_heal = _gf_true;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ need_heal = _gf_true;
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ need_heal = _gf_true;
+
+ if ((replies[i].op_ret == 0) &&
+ (gf_uuid_is_null(replies[i].poststat.ia_gfid)))
+ need_heal = _gf_true;
+ }
+
+ return need_heal;
+}
+
+static int
+afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ uuid_t pargfid, const char *bname)
+{
+ int i = 0;
+ int type_idx = -1;
+ ia_type_t inode_type = IA_INVAL;
+ ia_type_t inode_type1 = IA_INVAL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (inode_type == IA_INVAL) {
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ continue;
+ }
+ inode_type1 = replies[i].poststat.ia_type;
+ if (sources[i] || source == -1) {
+ if ((sources[type_idx] || source == -1) &&
+ (inode_type != inode_type1)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch for <gfid:%s>/%s: "
+ "%s on %s and %s on %s",
+ uuid_utoa(pargfid), bname,
+ gf_inode_type_to_str(inode_type1),
+ priv->children[i]->name,
+ gf_inode_type_to_str(inode_type),
+ priv->children[type_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s;count=2;"
+ "child-%d=%s;type-%d=%s;child-%d=%s;"
+ "type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name,
+ i, gf_inode_type_to_str(inode_type1), type_idx,
+ priv->children[type_idx]->name, type_idx,
+ gf_inode_type_to_str(inode_type));
+ return -EIO;
+ }
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ }
+ }
+ return 0;
+}
+
+static int
+afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ int *gfid_idx, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ unsigned char *locked_on, dict_t *xdata)
+{
+ int i = 0;
+ int gfid_idx_iter = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ void *gfid1 = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (!gfid) {
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ continue;
+ }
+
+ gfid1 = &replies[i].poststat.ia_gfid;
+ if (sources[i] || source == -1) {
+ if ((sources[gfid_idx_iter] || source == -1) &&
+ gf_uuid_compare(gfid, gfid1)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, gfid_idx_iter, i,
+ locked_on, gfid_idx, xdata);
+ if (!ret && *gfid_idx >= 0) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ "GFID split-brain resolved");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-"
+ "heal-msg dict");
+ }
+ return ret;
+ }
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ }
+ }
+
+ *gfid_idx = gfid_idx_iter;
+ return 0;
+}
+
+static gf_boolean_t
+afr_selfheal_name_source_empty_check(xlator_t *this, struct afr_reply *replies,
+ unsigned char *sources, int source)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+
+ priv = this->private;
+
+ if (source == -1) {
+ source_is_empty = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+
+ if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT)
+ continue;
+
+ source_is_empty = _gf_false;
+ break;
+ }
+out:
+ return source_is_empty;
+}
+
+int
+__afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies,
+ void *gfid_req, dict_t *xdata)
+{
+ int gfid_idx = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t is_gfid_absent = _gf_false;
+
+ need_heal = afr_selfheal_name_need_heal_check(this, replies);
+ if (!need_heal)
+ return 0;
+
+ source_is_empty = afr_selfheal_name_source_empty_check(this, replies,
+ sources, source);
+ if (source_is_empty) {
+ ret = __afr_selfheal_name_expunge(this, parent, pargfid, bname, inode,
+ replies);
+ if (ret == -EIO)
+ ret = -1;
+ return ret;
+ }
+
+ ret = afr_selfheal_name_type_mismatch_check(this, replies, source, sources,
+ pargfid, bname);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources,
+ &gfid_idx, pargfid, bname,
+ inode, locked_on, xdata);
+ if (ret)
+ return ret;
+
+ if (gfid_idx == -1) {
+ if (!gfid_req || gf_uuid_is_null(gfid_req))
+ return -1;
+ gfid = gfid_req;
+ } else {
+ gfid = &replies[gfid_idx].poststat.ia_gfid;
+ if (source == -1)
+ /* Either entry split-brain or dirty xattrs are present on parent.*/
+ source = gfid_idx;
+ }
+
+ is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
+ ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
+ replies, gfid, locked_on, source, sources,
+ is_gfid_absent, &gfid_idx);
+ if (ret || (gfid_idx < 0))
+ return ret;
+
+ ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
+ inode, replies, gfid_idx);
+ if (ret == -EIO)
+ ret = -1;
+
+ return ret;
+}
+
+int
+__afr_selfheal_name_finalize_source(xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on, uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count || afr_does_witness_exist(this, witness)) {
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return -1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ return source;
+}
+
+int
+__afr_selfheal_name_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ int *source_p)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_unlocked_discover(frame, parent, pargfid, replies);
+ if (ret)
+ goto out;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, locked_on, sources,
+ sinks, witness, NULL);
+ if (ret)
+ goto out;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_name_finalize_source(this, sources, healed_sinks,
+ locked_on, witness);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+out:
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+
+ return ret;
+}
+
+int
+afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, void *gfid_req,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *locked_on = NULL;
+ int source = -1;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ dict_t *xattr = NULL;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref(xattr);
+ return -1;
+ }
+
+ priv = this->private;
+
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
+ locked_on);
+ {
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_prepare(frame, this, parent, pargfid,
+ locked_on, sources, sinks,
+ healed_sinks, &source);
+ if (ret)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ locked_on, xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode,
+ sources, sinks, healed_sinks, source,
+ locked_on, replies, gfid_req, xdata);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on,
+ NULL);
+ if (inode)
+ inode_unref(inode);
+
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
+}
+
+int
+afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, gf_boolean_t *need_heal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ local->child_up, NULL);
+ if (!inode)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid)) {
+ *need_heal = _gf_true;
+ break;
+ }
+ }
+
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return 0;
+}
+
+int
+afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+ void *gfid_req, dict_t *xdata)
+{
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t need_heal = _gf_false;
+
+ parent = afr_inode_find(this, pargfid);
+ if (!parent)
+ goto out;
+
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ goto out;
+
+ ret = afr_selfheal_name_unlocked_inspect(frame, this, parent, pargfid,
+ bname, &need_heal);
+ if (ret)
+ goto out;
+
+ if (need_heal) {
+ ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname,
+ gfid_req, xdata);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (parent)
+ inode_unref(parent);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+
+ return ret;
+}