diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-name.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 366 |
1 files changed, 283 insertions, 83 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index f1626cc034e..f8b887e9a24 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -19,29 +19,43 @@ int -__afr_selfheal_assign_gfid (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, inode_t *inode, - struct afr_reply *replies, void *gfid) +__afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + struct afr_reply *replies, void *gfid, + unsigned char *locked_on, + gf_boolean_t is_gfid_absent) { - int i = 0; - afr_private_t *priv = NULL; - dict_t *xdata = NULL; - int ret = 0; - loc_t loc = {0, }; + int ret = 0; + int up_count = 0; + int locked_count = 0; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + loc_t loc = {0, }; + call_frame_t *new_frame = NULL; + afr_local_t *new_local = NULL; priv = this->private; + new_frame = afr_frame_create (this); + if (!new_frame) { + ret = -ENOMEM; + goto out; + } + + new_local = new_frame->local; + uuid_copy (parent->gfid, pargfid); xdata = dict_new (); if (!xdata) { - return -ENOMEM; + ret = -ENOMEM; + goto out; } ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16); if (ret) { - dict_destroy (xdata); - return -ENOMEM; + ret = -ENOMEM; + goto out; } loc.parent = inode_ref (parent); @@ -49,24 +63,53 @@ __afr_selfheal_assign_gfid (call_frame_t *frame, xlator_t *this, inode_t *parent uuid_copy (loc.pargfid, pargfid); loc.name = bname; - for (i = 0; i < priv->child_count; i++) { - if (replies[i].op_ret == 0 || replies[i].op_errno != ENODATA) - continue; + if (is_gfid_absent) { + /* Ensure all children of AFR are up before performing gfid heal, to + * guard against the possibility of gfid split brain. */ + + up_count = AFR_COUNT (priv->child_up, priv->child_count); + if (up_count != priv->child_count) { + ret = -EIO; + goto out; + } + + locked_count = AFR_COUNT (locked_on, priv->child_count); + if (locked_count != priv->child_count) { + ret = -EIO; + goto out; + } + } - ret = syncop_lookup (priv->children[i], &loc, xdata, 0, 0, 0); - } + /* Clear out old replies here and wind lookup on all locked + * subvolumes to achieve two things: + * a. gfid heal on those subvolumes that do not have gfid associated + * with the inode, and + * b. refresh replies, which can be consumed by + * __afr_selfheal_name_impunge(). + */ + + afr_replies_wipe (replies, priv->child_count); + + AFR_ONLIST (locked_on, new_frame, afr_selfheal_discover_cbk, lookup, + &loc, xdata); + afr_replies_copy (replies, new_local->replies, priv->child_count); + +out: loc_wipe (&loc); - dict_unref (xdata); + if (xdata) + dict_unref (xdata); + if (new_frame) + AFR_STACK_DESTROY (new_frame); return ret; } int -__afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, inode_t *inode, - struct afr_reply *replies, int gfid_idx) +__afr_selfheal_name_impunge (xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + struct afr_reply *replies, int gfid_idx) { int i = 0; afr_private_t *priv = NULL; @@ -84,8 +127,8 @@ __afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this, inode_t *paren replies[gfid_idx].poststat.ia_gfid) == 0) continue; - ret |= afr_selfheal_recreate_entry (frame, this, i, gfid_idx, - parent, bname, inode, replies); + ret |= afr_selfheal_recreate_entry (this, i, gfid_idx, parent, + bname, inode, replies); } return ret; @@ -93,8 +136,8 @@ __afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this, inode_t *paren int -__afr_selfheal_name_expunge (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, inode_t *inode, +__afr_selfheal_name_expunge (xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, struct afr_reply *replies) { loc_t loc = {0, }; @@ -143,25 +186,44 @@ __afr_selfheal_name_expunge (call_frame_t *frame, xlator_t *this, inode_t *paren } +/* This function is to be called after ensuring that there is no gfid mismatch + * for the inode across multiple sources + */ +static int +afr_selfheal_gfid_idx_get (xlator_t *this, struct afr_reply *replies, + unsigned char *sources) +{ + int i = 0; + int gfid_idx = -1; + afr_private_t *priv = NULL; + + priv = this->private; -int -__afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, inode_t *inode, - unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, int source, - unsigned char *locked_on, struct afr_reply *replies, - void *gfid_req) + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + + if (!sources[i]) + continue; + + if (uuid_is_null (replies[i].poststat.ia_gfid)) + continue; + + gfid_idx = i; + break; + } + return gfid_idx; +} + +static gf_boolean_t +afr_selfheal_name_need_heal_check (xlator_t *this, struct afr_reply *replies) { - int i = 0; - afr_private_t *priv = NULL; - void* gfid = NULL; - int gfid_idx = -1; - gf_boolean_t source_is_empty = _gf_true; - gf_boolean_t need_heal = _gf_false; - int first_idx = -1; - char g1[64],g2[64]; + int i = 0; + int first_idx = -1; + gf_boolean_t need_heal = _gf_false; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) @@ -182,29 +244,75 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, if (uuid_compare (replies[i].poststat.ia_gfid, replies[first_idx].poststat.ia_gfid)) need_heal = _gf_true; + + if ((replies[i].op_ret == 0) && + (uuid_is_null(replies[i].poststat.ia_gfid))) + need_heal = _gf_true; + } - if (!need_heal) - return 0; + return need_heal; +} - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) +static int +afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies, + int source, unsigned char *sources, + uuid_t pargfid, const char *bname) +{ + int i = 0; + int type_idx = -1; + ia_type_t inode_type = IA_INVAL; + afr_private_t *priv = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) continue; - if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT) + if (replies[i].poststat.ia_type == IA_INVAL) continue; - source_is_empty = _gf_false; - break; - } + if (inode_type == IA_INVAL) { + inode_type = replies[i].poststat.ia_type; + type_idx = i; + continue; + } + + if (sources[i] || source == -1) { + if ((sources[type_idx] || source == -1) && + (inode_type != replies[i].poststat.ia_type)) { + gf_msg (this->name, GF_LOG_WARNING, 0, + AFR_MSG_SPLIT_BRAIN, + "Type mismatch for <gfid:%s>/%s: " + "%d on %s and %d on %s", + uuid_utoa(pargfid), bname, + replies[i].poststat.ia_type, + priv->children[i]->name, + replies[type_idx].poststat.ia_type, + priv->children[type_idx]->name); + return -EIO; + } + inode_type = replies[i].poststat.ia_type; + type_idx = i; + } + } + return 0; +} - if (source == -1) - source_is_empty = _gf_false; +static int +afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies, + int source, unsigned char *sources, + int *gfid_idx, uuid_t pargfid, + const char *bname) +{ + int i = 0; + int gfid_idx_iter = -1; + void *gfid = NULL; + afr_private_t *priv = NULL; + char g1[64], g2[64]; - if (source_is_empty) { - return __afr_selfheal_name_expunge (frame, this, parent, pargfid, - bname, inode, replies); - } + priv = this->private; for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) @@ -215,13 +323,12 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, if (!gfid) { gfid = &replies[i].poststat.ia_gfid; - gfid_idx = i; + gfid_idx_iter = i; continue; } if (sources[i] || source == -1) { - if (gfid_idx != -1 && - (sources[gfid_idx] || source == -1) && + if ((sources[gfid_idx_iter] || source == -1) && uuid_compare (gfid, replies[i].poststat.ia_gfid)) { gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN, @@ -230,34 +337,109 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, uuid_utoa (pargfid), bname, uuid_utoa_r (replies[i].poststat.ia_gfid, g1), priv->children[i]->name, - uuid_utoa_r (replies[gfid_idx].poststat.ia_gfid, g2), - priv->children[gfid_idx]->name); - return -1; + uuid_utoa_r (replies[gfid_idx_iter].poststat.ia_gfid, g2), + priv->children[gfid_idx_iter]->name); + return -EIO; } gfid = &replies[i].poststat.ia_gfid; - gfid_idx = i; - continue; + gfid_idx_iter = i; } } + *gfid_idx = gfid_idx_iter; + return 0; +} + +static gf_boolean_t +afr_selfheal_name_source_empty_check (xlator_t *this, struct afr_reply *replies, + unsigned char *sources, int source) +{ + int i = 0; + afr_private_t *priv = NULL; + gf_boolean_t source_is_empty = _gf_true; + + priv = this->private; + + if (source == -1) { + source_is_empty = _gf_false; + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + + if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT) + continue; + + source_is_empty = _gf_false; + break; + } +out: + return source_is_empty; +} + +int +__afr_selfheal_name_do (xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, int source, + unsigned char *locked_on, struct afr_reply *replies, + void *gfid_req) +{ + int gfid_idx = -1; + int ret = -1; + void *gfid = NULL; + gf_boolean_t source_is_empty = _gf_true; + gf_boolean_t need_heal = _gf_false; + gf_boolean_t is_gfid_absent = _gf_false; + + need_heal = afr_selfheal_name_need_heal_check (this, replies); + if (!need_heal) + return 0; + + source_is_empty = afr_selfheal_name_source_empty_check (this, replies, + sources, + source); + if (source_is_empty) + return __afr_selfheal_name_expunge (this, parent, pargfid, + bname, inode, replies); + + ret = afr_selfheal_name_type_mismatch_check (this, replies, source, + sources, pargfid, bname); + if (ret) + return ret; + + ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source, + sources, &gfid_idx, + pargfid, bname); + if (ret) + return ret; + if (gfid_idx == -1) { if (!gfid_req || uuid_is_null (gfid_req)) return -1; gfid = gfid_req; + } else { + gfid = &replies[gfid_idx].poststat.ia_gfid; } - __afr_selfheal_assign_gfid (frame, this, parent, pargfid, bname, inode, - replies, gfid); - /*TODO: - * once the gfid is assigned refresh the replies and carry on with - * impunge. i.e. gfid_idx won't be -1. - */ - if (gfid_idx == -1) - return -1; + is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false; + ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode, + replies, gfid, locked_on, + is_gfid_absent); + if (ret) + return ret; + + if (gfid_idx == -1) { + gfid_idx = afr_selfheal_gfid_idx_get (this, replies, sources); + if (gfid_idx == -1) + return -1; + } - return __afr_selfheal_name_impunge (frame, this, parent, pargfid, - bname, inode, replies, gfid_idx); + return __afr_selfheal_name_impunge (this, parent, pargfid, bname, inode, + replies, gfid_idx); } @@ -310,8 +492,7 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren if (ret) goto out; - ret = afr_selfheal_find_direction (frame, this, replies, - AFR_ENTRY_TRANSACTION, + ret = afr_selfheal_find_direction (this, replies, AFR_ENTRY_TRANSACTION, locked_on, sources, sinks); if (ret) goto out; @@ -355,6 +536,17 @@ afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, struct afr_reply *replies = NULL; int ret = -1; inode_t *inode = NULL; + dict_t *xattr = NULL; + + xattr = dict_new (); + if (!xattr) + return -ENOMEM; + + ret = dict_set_int32 (xattr, GF_GFIDLESS_LOOKUP, 1); + if (ret) { + dict_destroy (xattr); + return -1; + } priv = this->private; @@ -379,16 +571,17 @@ afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, if (ret) goto unlock; - inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname, - replies, locked_on); + inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname, + replies, locked_on, + xattr); if (!inode) { ret = -ENOMEM; goto unlock; } - ret = __afr_selfheal_name_do (frame, this, parent, pargfid, bname, - inode, sources, sinks, healed_sinks, - source, locked_on, replies, + ret = __afr_selfheal_name_do (this, parent, pargfid, bname, + inode, sources, sinks, healed_sinks, + source, locked_on, replies, gfid_req); } unlock: @@ -399,6 +592,8 @@ unlock: if (replies) afr_replies_wipe (replies, priv->child_count); + if (xattr) + dict_unref (xattr); return ret; } @@ -420,7 +615,7 @@ afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this, replies = alloca0 (sizeof (*replies) * priv->child_count); inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname, - replies, priv->child_up); + replies, priv->child_up, NULL); if (!inode) return -ENOMEM; @@ -474,9 +669,14 @@ afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname, if (ret) goto out; - if (need_heal) - afr_selfheal_name_do (frame, this, parent, pargfid, bname, - gfid_req); + if (need_heal) { + ret = afr_selfheal_name_do (frame, this, parent, pargfid, bname, + gfid_req); + if (ret) + goto out; + } + + ret = 0; out: if (parent) inode_unref (parent); |