From 84c3d7a83a8c84ca11514202a1bc365026fd1c87 Mon Sep 17 00:00:00 2001 From: Pranith K Date: Thu, 14 Jul 2011 06:30:40 +0000 Subject: cluster/afr: Detect conflict/gfid self-heals Added some helper functions that can be reused Signed-off-by: Pranith Kumar K Signed-off-by: Anand Avati BUG: 2745 (failure to detect split brain) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2745 --- xlators/cluster/afr/src/afr-common.c | 621 +++++++++++++++++++------ xlators/cluster/afr/src/afr-dir-read.c | 3 +- xlators/cluster/afr/src/afr-open.c | 8 +- xlators/cluster/afr/src/afr-self-heal-common.c | 7 +- xlators/cluster/afr/src/afr-self-heal-data.c | 2 +- xlators/cluster/afr/src/afr.h | 52 ++- 6 files changed, 532 insertions(+), 161 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 21f7b4e43..894442936 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -60,6 +60,60 @@ #define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL #define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL +int +afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, + gf_boolean_t fail_conflict); +void +afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count) +{ + int i = 0; + + for (i = 0; i < child_count; i++) + dst[i] = src[i]; +} + +void +afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path) +{ + int i = 0; + afr_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (xattr_req, priv->pending_key[i], + 3 * sizeof(int32_t)); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "%s: Unable to set dict value for %s", + path, priv->pending_key[i]); + /* 3 = data+metadata+entry */ + } +} + +int +afr_errno_count (int32_t *children, int *child_errno, + unsigned int child_count, int32_t op_errno) +{ + int i = 0; + int errno_count = 0; + int child = 0; + + for (i = 0; i < child_count; i++) { + if (children) { + child = children[i]; + if (child == -1) + break; + } else { + child = i; + } + if (child_errno[child] == op_errno) + errno_count++; + } + return errno_count; +} + int32_t afr_set_dict_gfid (dict_t *dict, uuid_t gfid) { @@ -346,7 +400,7 @@ afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child) } gf_boolean_t -afr_is_success_child (int32_t *success_children, int32_t child_count, +afr_is_child_present (int32_t *success_children, int32_t child_count, int32_t child) { gf_boolean_t success_child = _gf_false; @@ -375,7 +429,7 @@ afr_is_read_child (int32_t *success_children, int32_t *sources, GF_ASSERT (success_children); GF_ASSERT (child_count > 0); - success_child = afr_is_success_child (success_children, child_count, + success_child = afr_is_child_present (success_children, child_count, child); if (!success_child) goto out; @@ -526,6 +580,23 @@ out: return ret; } +void +afr_reset_xattr (dict_t **xattr, unsigned int child_count) +{ + unsigned int i = 0; + + if (!xattr) + goto out; + for (i = 0; i < child_count; i++) { + if (xattr[i]) { + dict_unref (xattr[i]); + xattr[i] = NULL; + } + } +out: + return; +} + void afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) { @@ -540,13 +611,14 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) if (sh->buf) GF_FREE (sh->buf); + if (sh->parentbufs) + GF_FREE (sh->parentbufs); + + if (sh->inode) + inode_unref (sh->inode); + if (sh->xattr) { - for (i = 0; i < priv->child_count; i++) { - if (sh->xattr[i]) { - dict_unref (sh->xattr[i]); - sh->xattr[i] = NULL; - } - } + afr_reset_xattr (sh->xattr, priv->child_count); GF_FREE (sh->xattr); } @@ -590,6 +662,9 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) if (sh->fresh_children) GF_FREE (sh->fresh_children); + if (sh->fresh_parent_dirs) + GF_FREE (sh->fresh_parent_dirs); + loc_wipe (&sh->parent_loc); } @@ -636,7 +711,6 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) void afr_local_cleanup (afr_local_t *local, xlator_t *this) { - int i = 0; afr_private_t * priv = NULL; if (!local) @@ -665,12 +739,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) { /* lookup */ if (local->cont.lookup.xattrs) { - for (i = 0; i < priv->child_count; i++) { - if (local->cont.lookup.xattrs[i]) { - dict_unref (local->cont.lookup.xattrs[i]); - local->cont.lookup.xattrs[i] = NULL; - } - } + afr_reset_xattr (local->cont.lookup.xattrs, + priv->child_count); GF_FREE (local->cont.lookup.xattrs); local->cont.lookup.xattrs = NULL; } @@ -813,26 +883,6 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent) uuid_copy (loc->pargfid, postparent->ia_gfid); } - -int -afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - if (local->govinda_gOvinda && local->cont.lookup.inode) { - afr_set_split_brain (this, local->cont.lookup.inode, _gf_true); - } - - AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->cont.lookup.inode, &local->cont.lookup.buf, - local->cont.lookup.xattr, - &local->cont.lookup.postparent); - - return 0; -} - void afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) { @@ -842,13 +892,15 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) dict_t **xattr = NULL; GF_ASSERT (local); - GF_ASSERT (local->cont.lookup.read_child >= 0); buf = &local->cont.lookup.buf; postparent = &local->cont.lookup.postparent; xattr = &local->cont.lookup.xattr; - read_child = local->cont.lookup.read_child; + read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode, + NULL); + gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d", + read_child); *xattr = dict_ref (local->cont.lookup.xattrs[read_child]); *buf = local->cont.lookup.bufs[read_child]; *postparent = local->cont.lookup.postparents[read_child]; @@ -859,8 +911,7 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) } } - - static void +static void afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this, int child_index, dict_t *xattr) { @@ -885,8 +936,8 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this, } static void -afr_lookup_detect_self_heal_by_xattr (afr_local_t *local, xlator_t *this, - dict_t *xattr) +afr_lookup_set_self_heal_data_by_xattr (afr_local_t *local, xlator_t *this, + dict_t *xattr) { GF_ASSERT (local); GF_ASSERT (this); @@ -954,6 +1005,8 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this) local->self_heal.need_metadata_self_heal = _gf_true; local->self_heal.need_data_self_heal = _gf_true; local->self_heal.need_entry_self_heal = _gf_true; + local->self_heal.need_gfid_self_heal = _gf_true; + local->self_heal.need_missing_entry_self_heal = _gf_true; gf_log(this->name, GF_LOG_INFO, "entries are missing in lookup of %s.", local->loc.path); @@ -961,14 +1014,15 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this) goto out; } - if (local->success_count > 0) { - if (afr_is_split_brain (this, local->cont.lookup.inode) && - IA_ISREG (local->cont.lookup.inode->ia_type)) { - local->self_heal.need_data_self_heal = _gf_true; - gf_log (this->name, GF_LOG_WARNING, - "split brain detected during lookup of %s.", - local->loc.path); - } + if ((local->success_count > 0) && + afr_is_split_brain (this, local->cont.lookup.inode) && + IA_ISREG (local->cont.lookup.inode->ia_type)) { + local->self_heal.need_data_self_heal = _gf_true; + local->self_heal.need_gfid_self_heal = _gf_true; + local->self_heal.need_missing_entry_self_heal = _gf_true; + gf_log (this->name, GF_LOG_WARNING, + "split brain detected during lookup of %s.", + local->loc.path); } out: @@ -981,31 +1035,39 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv) GF_ASSERT (sh); GF_ASSERT (priv); - return ((priv->data_self_heal && sh->need_data_self_heal) + return (sh->need_gfid_self_heal + || sh->need_missing_entry_self_heal + || (priv->data_self_heal && sh->need_data_self_heal) || (priv->metadata_self_heal && sh->need_metadata_self_heal) || (priv->entry_self_heal && sh->need_entry_self_heal)); } -gf_boolean_t -afr_is_self_heal_enabled (afr_private_t *priv) +afr_transaction_type +afr_transaction_type_get (ia_type_t ia_type) { - GF_ASSERT (priv); + afr_transaction_type type = AFR_METADATA_TRANSACTION; + + GF_ASSERT (ia_type != IA_INVAL); - return (priv->data_self_heal || priv->metadata_self_heal - || priv->entry_self_heal); + if (IA_ISDIR (ia_type)) { + type = AFR_ENTRY_TRANSACTION; + } else if (IA_ISREG (ia_type)) { + type = AFR_DATA_TRANSACTION; + } + return type; } int afr_lookup_select_read_child (afr_local_t *local, xlator_t *this, int32_t *read_child) { + ia_type_t ia_type = IA_INVAL; int32_t source = -1; - ia_type_t ia_type = 0; int ret = -1; - afr_transaction_type type = AFR_METADATA_TRANSACTION; dict_t **xattrs = NULL; int32_t *success_children = NULL; struct iatt *bufs = NULL; + afr_transaction_type type = AFR_METADATA_TRANSACTION; GF_ASSERT (local); GF_ASSERT (this); @@ -1013,18 +1075,23 @@ afr_lookup_select_read_child (afr_local_t *local, xlator_t *this, bufs = local->cont.lookup.bufs; success_children = local->cont.lookup.success_children; + /*We can take the success_children[0] only because we already + *handle the conflicting children other wise, we could select the + *read_child based on wrong file type + */ ia_type = local->cont.lookup.bufs[success_children[0]].ia_type; - if (IA_ISDIR (ia_type)) { - type = AFR_ENTRY_TRANSACTION; - } else if (IA_ISREG (ia_type)) { - type = AFR_DATA_TRANSACTION; - } + type = afr_transaction_type_get (ia_type); xattrs = local->cont.lookup.xattrs; source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs, type); - if (source < 0) + if (source < 0) { + gf_log (this->name, GF_LOG_DEBUG, "failed to select source " + "for %s", local->loc.path); goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s", + source, local->loc.path); *read_child = source; ret = 0; out: @@ -1041,7 +1108,10 @@ afr_is_self_heal_running (afr_local_t *local) static void afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode, gf_boolean_t is_background, ia_type_t ia_type, - int (*unwind) (call_frame_t *frame, xlator_t *this)) + void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, + xlator_t *this), + int (*unwind) (call_frame_t *frame, xlator_t *this, + int32_t op_ret, int32_t op_errno)) { afr_local_t *local = NULL; char sh_type_str[256] = {0,}; @@ -1054,6 +1124,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode, local->self_heal.background = is_background; local->self_heal.type = ia_type; local->self_heal.unwind = unwind; + local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk; afr_self_heal_type_str_get (&local->self_heal, sh_type_str, @@ -1066,8 +1137,135 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode, afr_self_heal (frame, this, inode); } +int +afr_gfid_missing_count (const char *xlator_name, int32_t *success_children, + struct iatt *bufs, unsigned int child_count, + const char *path) +{ + int gfid_miss_count = 0; + int i = 0; + struct iatt *child1 = NULL; + + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + child1 = &bufs[success_children[i]]; + if (uuid_is_null (child1->ia_gfid)) { + gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null" + " on subvolume %d", path, success_children[i]); + gfid_miss_count++; + } + } + + return gfid_miss_count; +} + +static int +afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this) +{ + int32_t *success_children = NULL; + afr_private_t *priv = NULL; + struct iatt *bufs = NULL; + int miss_count = 0; + + priv = this->private; + bufs = local->cont.lookup.bufs; + success_children = local->cont.lookup.success_children; + + miss_count = afr_gfid_missing_count (this->name, success_children, + bufs, priv->child_count, + local->loc.path); + return miss_count; +} + +gf_boolean_t +afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children, + unsigned int child_count, const char *path, + const char *xlator_name) +{ + gf_boolean_t conflicting = _gf_false; + int i = 0; + struct iatt *child1 = NULL; + struct iatt *child2 = NULL; + uuid_t *gfid = NULL; + + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + child1 = &bufs[success_children[i]]; + if ((!gfid) && (!uuid_is_null (child1->ia_gfid))) + gfid = &child1->ia_gfid; + + if (i == 0) + continue; + + child2 = &bufs[success_children[i-1]]; + if (FILETYPE_DIFFERS (child1, child2)) { + gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype " + "differs on subvolumes (%d, %d)", path, + success_children[i-1], success_children[i]); + conflicting = _gf_true; + goto out; + } + if (!gfid || uuid_is_null (child1->ia_gfid)) + continue; + if (uuid_compare (*gfid, child1->ia_gfid)) { + gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs" + " on subvolume %d", path, success_children[i]); + conflicting = _gf_true; + goto out; + } + } +out: + return conflicting; +} + +/* afr_update_gfid_from_iatts: This function should be called only if the + * iatts are not conflicting. + */ +void +afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs, + int32_t *success_children, unsigned int child_count) +{ + uuid_t *gfid = NULL; + int i = 0; + int child = 0; + + for (i = 0; i < child_count; i++) { + child = success_children[i]; + if (child == -1) + break; + if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) { + gfid = &bufs[child].ia_gfid; + } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) { + if (uuid_compare (*gfid, bufs[child].ia_gfid)) { + GF_ASSERT (0); + goto out; + } + } + } + if (gfid && (!uuid_is_null (*gfid))) + uuid_copy (uuid, *gfid); +out: + return; +} + +static gf_boolean_t +afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this) +{ + afr_private_t *priv = NULL; + gf_boolean_t conflict = _gf_false; + + priv = this->private; + conflict = afr_conflicting_iattrs (local->cont.lookup.bufs, + local->cont.lookup.success_children, + priv->child_count, local->loc.path, + this->name); + return conflict; +} + static void -afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) +afr_lookup_set_self_heal_data (afr_local_t *local, xlator_t *this) { int i = 0; struct iatt *bufs = NULL; @@ -1076,8 +1274,20 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) int32_t child1 = -1; int32_t child2 = -1; + priv = this->private; afr_detect_self_heal_by_lookup_status (local, this); + if (afr_lookup_gfid_missing_count (local, this)) + local->self_heal.need_gfid_self_heal = _gf_true; + + if (_gf_true == afr_lookup_conflicting_entries (local, this)) + local->self_heal.need_missing_entry_self_heal = _gf_true; + else + afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req, + local->cont.lookup.bufs, + local->cont.lookup.success_children, + priv->child_count); + bufs = local->cont.lookup.bufs; for (i = 1; i < local->success_count; i++) { child1 = local->cont.lookup.success_children[i-1]; @@ -1087,12 +1297,75 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) } xattr = local->cont.lookup.xattrs; - priv = this->private; for (i = 0; i < local->success_count; i++) { child1 = local->cont.lookup.success_children[i]; - afr_lookup_detect_self_heal_by_xattr (local, this, - xattr[child1]); + afr_lookup_set_self_heal_data_by_xattr (local, this, + xattr[child1]); + } +} + +int +afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) { + local->op_ret = -1; + if (afr_error_more_important (local->op_errno, op_errno)) + local->op_errno = op_errno; + + goto out; + } else { + local->op_ret = 0; + } + + afr_lookup_done_success_action (frame, this, _gf_true); +out: + AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->cont.lookup.inode, &local->cont.lookup.buf, + local->cont.lookup.xattr, + &local->cont.lookup.postparent); + + return 0; +} + +//TODO: At the moment only lookup needs this, so not doing any checks, in the +// future we will have to do fop specific operations +void +afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_local_t *sh_local = NULL; + afr_private_t *priv = NULL; + afr_self_heal_t *sh = NULL; + int i = 0; + struct iatt *lookup_bufs = NULL; + struct iatt *lookup_parentbufs = NULL; + + sh_local = sh_frame->local; + sh = &sh_local->self_heal; + local = sh->orig_frame->local; + lookup_bufs = local->cont.lookup.bufs; + lookup_parentbufs = local->cont.lookup.postparents; + priv = this->private; + + memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf)); + memcpy (lookup_parentbufs, sh->parentbufs, + priv->child_count * sizeof (*sh->parentbufs)); + + afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count); + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) + local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]); } + + afr_reset_children (local->cont.lookup.success_children, + priv->child_count); + afr_children_copy (local->cont.lookup.success_children, + sh->fresh_children, priv->child_count); } static void @@ -1115,20 +1388,14 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this, goto out; } - if (_gf_false == afr_is_self_heal_enabled (priv)) { - gf_log (this->name, GF_LOG_DEBUG, - "Self heal is not enabled"); - goto out; - } - - afr_lookup_detect_self_heal (local, this); + afr_lookup_set_self_heal_data (local, this); if (afr_can_self_heal_proceed (&local->self_heal, priv)) { - if (afr_is_self_heal_running (local)) { + if (afr_is_self_heal_running (local)) goto out; - } afr_launch_self_heal (frame, this, local->cont.lookup.inode, _gf_true, local->cont.lookup.buf.ia_type, + afr_post_gfid_sh_success, afr_self_heal_lookup_unwind); *sh_launched = _gf_true; } @@ -1136,46 +1403,6 @@ out: return; } -static gf_boolean_t -afr_lookup_split_brain (afr_local_t *local, xlator_t *this) -{ - int i = 0; - gf_boolean_t symptom = _gf_false; - struct iatt *bufs = NULL; - int32_t *success_children = NULL; - struct iatt *child1 = NULL; - struct iatt *child2 = NULL; - const char *path = NULL; - - bufs = local->cont.lookup.bufs; - success_children = local->cont.lookup.success_children; - for (i = 1; i < local->success_count; i++) { - child1 = &bufs[success_children[i-1]]; - child2 = &bufs[success_children[i]]; - /* - * TODO: gfid self-heal - * if (uuid_compare (child1->ia_gfid, child2->ia_gfid)) { - * gf_log (this->name, GF_LOG_WARNING, "%s: gfid differs" - * " on subvolumes (%d, %d)", local->loc.path, - * success_children[i-1], success_children[i]); - * symptom = _gf_true; - * } - */ - - if (FILETYPE_DIFFERS (child1, child2)) { - path = local->loc.path; - gf_log (this->name, GF_LOG_WARNING, "%s: filetype " - "differs on subvolumes (%d, %d)", path, - success_children[i-1], success_children[i]); - symptom = _gf_true; - local->govinda_gOvinda = 1; - } - if (symptom) - break; - } - return symptom; -} - void afr_get_fresh_children (int32_t *success_children, int32_t *sources, int32_t *fresh_children, unsigned int child_count) @@ -1187,6 +1414,7 @@ afr_get_fresh_children (int32_t *success_children, int32_t *sources, GF_ASSERT (sources); GF_ASSERT (fresh_children); + afr_reset_children (fresh_children, child_count); for (i = 0; i < child_count; i++) { if (success_children[i] == -1) break; @@ -1206,7 +1434,6 @@ afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child) GF_ASSERT (read_child >= 0); priv = this->private; - local->cont.lookup.read_child = read_child; afr_get_fresh_children (local->cont.lookup.success_children, local->cont.lookup.sources, local->fresh_children, priv->child_count); @@ -1216,6 +1443,60 @@ afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child) return 0; } +int +afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, + gf_boolean_t fail_conflict) +{ + int32_t read_child = -1; + int32_t ret = -1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + if (local->loc.parent == NULL) + fail_conflict = _gf_true; + + if (afr_conflicting_iattrs (local->cont.lookup.bufs, + local->cont.lookup.success_children, + priv->child_count, local->loc.path, + this->name)) { + if (fail_conflict == _gf_false) { + ret = 0; + } else { + local->op_ret = -1; + local->op_errno = EIO; + } + goto out; + } + + ret = afr_lookup_select_read_child (local, this, &read_child); + if (ret) { + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } + + ret = afr_lookup_set_read_ctx (local, this, read_child); + if (ret) { + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } + + afr_lookup_build_response_params (local, this); + if (afr_is_fresh_lookup (&local->loc, this)) { + afr_update_loc_gfids (&local->loc, + &local->cont.lookup.buf, + &local->cont.lookup.postparent); + } + + ret = 0; +out: + return ret; +} + static void afr_lookup_done (call_frame_t *frame, xlator_t *this) { @@ -1224,44 +1505,44 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; int ret = -1; gf_boolean_t sh_launched = _gf_false; - int32_t read_child = -1; + int gfid_miss_count = 0; + int enotconn_count = 0; + int up_children_count = 0; priv = this->private; local = frame->local; if (local->op_ret < 0) goto unwind; - - if (_gf_true == afr_lookup_split_brain (local, this)) { - local->op_ret = -1; - local->op_errno = EIO; - goto unwind; - } - - ret = afr_lookup_select_read_child (local, this, &read_child); - if (ret) { + gfid_miss_count = afr_lookup_gfid_missing_count (local, this); + up_children_count = afr_up_children_count (priv->child_count, + local->child_up); + enotconn_count = priv->child_count - up_children_count; + if ((gfid_miss_count == local->success_count) && + (enotconn_count > 0)) { local->op_ret = -1; local->op_errno = EIO; + gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, " + "LOOKUP on a file without gfid is not allowed when " + "some of the children are down", local->loc.path); goto unwind; } - ret = afr_lookup_set_read_ctx (local, this, read_child); + ret = afr_lookup_done_success_action (frame, this, _gf_false); if (ret) goto unwind; - - afr_lookup_build_response_params (local, this); - if (afr_is_fresh_lookup (&local->loc, this)) { - afr_update_loc_gfids (&local->loc, &local->cont.lookup.buf, - &local->cont.lookup.postparent); - } + uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req); afr_lookup_perform_self_heal_if_needed (frame, this, &sh_launched); - if (sh_launched) + if (sh_launched) { unwind = 0; + goto unwind; + } + unwind: if (unwind) { AFR_STACK_UNWIND (lookup, frame, local->op_ret, - local->op_errno, local->cont.lookup.inode, + local->op_errno, local->cont.lookup.inode, &local->cont.lookup.buf, local->cont.lookup.xattr, &local->cont.lookup.postparent); @@ -1277,8 +1558,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) * */ -static gf_boolean_t -__error_more_important (int32_t old_errno, int32_t new_errno) +gf_boolean_t +afr_error_more_important (int32_t old_errno, int32_t new_errno) { gf_boolean_t ret = _gf_true; @@ -1293,6 +1574,28 @@ __error_more_important (int32_t old_errno, int32_t new_errno) return ret; } +int32_t +afr_resultant_errno_get (int32_t *children, + int *child_errno, unsigned int child_count) +{ + int i = 0; + int32_t op_errno = 0; + int child = 0; + + for (i = 0; i < child_count; i++) { + if (children) { + child = children[i]; + if (child == -1) + break; + } else { + child = i; + } + if (afr_error_more_important (op_errno, child_errno[child])) + op_errno = child_errno[child]; + } + return op_errno; +} + static void afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno) { @@ -1300,7 +1603,7 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno) if (op_errno == ENOENT) local->enoent_count++; - if (__error_more_important (local->op_errno, op_errno)) + if (afr_error_more_important (local->op_errno, op_errno)) local->op_errno = op_errno; if (local->op_errno == ESTALE) { @@ -1439,7 +1742,6 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count) if (NULL == local->fresh_children) goto out; - local->cont.lookup.read_child = -1; ret = 0; out: return ret; @@ -1451,6 +1753,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, { afr_private_t *priv = NULL; afr_local_t *local = NULL; + void *gfid_req = NULL; int ret = -1; int i = 0; int call_count = 0; @@ -1521,23 +1824,13 @@ afr_lookup (call_frame_t *frame, xlator_t *this, else local->xattr_req = dict_ref (xattr_req); - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i], - 3 * sizeof(int32_t)); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "%s: Unable to set dict value for %s", - loc->path, priv->pending_key[i]); - /* 3 = data+metadata+entry */ - } - + afr_xattr_req_prepare (this, local->xattr_req, loc->path); ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "%s: Unable to set dict value for %s", loc->path, GLUSTERFS_INODELK_COUNT); } - ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, @@ -1545,6 +1838,16 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc->path, GLUSTERFS_ENTRYLK_COUNT); } + ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get the gfid from dict"); + } else { + uuid_copy (local->cont.lookup.gfid_req, gfid_req); + } + if (local->loc.parent != NULL) + dict_del (xattr_req, "gfid-req"); + for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND_COOKIE (frame, afr_lookup_cbk, @@ -3252,3 +3555,17 @@ afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, fresh_children[i] = child; } } + +int +afr_get_children_count (int32_t *fresh_children, unsigned int child_count) +{ + int count = 0; + int i = 0; + + for (i = 0; i < child_count; i++) { + if (fresh_children[i] == -1) + break; + count++; + } + return count; +} diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index ce941f018..64b14f88f 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -50,7 +50,8 @@ #include "afr-self-heal-common.h" int -afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this) +afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, + int32_t op_errno) { afr_local_t *local = NULL; diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 4aa587399..ab636a5b4 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -277,7 +277,8 @@ __unopened_count (int child_count, unsigned int *opened_on, unsigned char *child int -afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this) +afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, + int32_t op_errno) { afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -418,9 +419,8 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this) GF_ASSERT (local->loc.path); /* forcibly trigger missing-entries self-heal */ - local->success_count = 1; - local->enoent_count = 1; - + sh->need_missing_entry_self_heal = _gf_true; + sh->need_gfid_self_heal = _gf_true; sh->data_lock_held = _gf_true; sh->need_data_self_heal = _gf_true; sh->type = local->fd->inode->ia_type; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 16345bee7..b28f9114f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1663,7 +1663,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) FRAME_SU_UNDO (bgsh_frame, afr_local_t); if (!sh->unwound) { - sh->unwind (sh->orig_frame, this); + sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno); } if (sh->background) { @@ -1723,7 +1723,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) sh_local = afr_local_copy (local, this); sh_frame->local = sh_local; sh = &sh_local->self_heal; - sh->inode = inode; + + sh->inode = inode_ref (inode); sh->orig_frame = frame; @@ -1731,6 +1732,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) sh->buf = GF_CALLOC (priv->child_count, sizeof (struct iatt), gf_afr_mt_iatt); + sh->parentbufs = GF_CALLOC (priv->child_count, sizeof (struct iatt), + gf_afr_mt_iatt); sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int), gf_afr_mt_int); sh->success = GF_CALLOC (priv->child_count, sizeof (int), diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index f9a257972..7179e929d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -699,7 +699,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]); if (sh->background) { - sh->unwind (sh->orig_frame, this); + sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno); sh->unwound = _gf_true; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 994ea40a4..7d5d05479 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -44,6 +44,9 @@ typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this, typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this, int child, int32_t op_error, int32_t op_errno); +typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this); + +typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); typedef struct afr_inode_params_ { uint64_t mask_type; @@ -128,6 +131,8 @@ typedef struct { gf_boolean_t need_data_self_heal; gf_boolean_t need_metadata_self_heal; gf_boolean_t need_entry_self_heal; + gf_boolean_t need_gfid_self_heal; + gf_boolean_t need_missing_entry_self_heal; gf_boolean_t forced_merge; /* Is this a self-heal triggered to forcibly merge the directories? */ @@ -147,17 +152,21 @@ typedef struct { self-heal on */ inode_t *inode; /* inode on which the self-heal is performed on */ + uuid_t sh_gfid_req; /* gfid self-heal needs to be done + with this gfid if it is not null */ /* Function to call to unwind. If self-heal is being done in the background, this function will be called as soon as possible. */ - int (*unwind) (call_frame_t *frame, xlator_t *this); + int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret, + int32_t op_errno); /* End of external interface members */ /* array of stat's, one for each child */ struct iatt *buf; + struct iatt *parentbufs; struct iatt parentbuf; struct iatt entrybuf; @@ -174,12 +183,17 @@ typedef struct { int success_count; /* array containing the fresh children found in the self-heal process */ int32_t *fresh_children; + /* array containing the fresh children found in the parent lookup */ + int32_t *fresh_parent_dirs; /* array of errno's, one for each child */ int *child_errno; int32_t **pending_matrix; int32_t **delta_matrix; + int32_t op_ret; + int32_t op_errno; + int *sources; int source; int active_source; @@ -197,6 +211,7 @@ typedef struct { blksize_t block_size; off_t file_size; off_t offset; + afr_post_remove_call_t post_remove_call; loc_t parent_loc; @@ -211,6 +226,7 @@ typedef struct { int (*completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this); + void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this); call_frame_t *sh_frame; } afr_self_heal_t; @@ -376,6 +392,7 @@ typedef struct _afr_local { } statfs; struct { + uuid_t gfid_req; inode_t *inode; struct iatt buf; struct iatt postparent; @@ -891,4 +908,37 @@ afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, int32_t child_count); void afr_reset_children (int32_t *fresh_children, int32_t child_count); +gf_boolean_t +afr_error_more_important (int32_t old_errno, int32_t new_errno); +int +afr_errno_count (int32_t *children, int *child_errno, + unsigned int child_count, int32_t op_errno); +int +afr_get_children_count (int32_t *fresh_children, unsigned int child_count); +gf_boolean_t +afr_is_child_present (int32_t *success_children, int32_t child_count, + int32_t child); +void +afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs, + int32_t *success_children, + unsigned int child_count); +void +afr_reset_xattr (dict_t **xattr, unsigned int child_count); +gf_boolean_t +afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children, + unsigned int child_count, const char *path, + const char *xlator_name); +int +afr_gfid_missing_count (const char *xlator_name, int32_t *children, + struct iatt *bufs, unsigned int child_count, + const char *path); +void +afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path); +void +afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count); +afr_transaction_type +afr_transaction_type_get (ia_type_t ia_type); +int32_t +afr_resultant_errno_get (int32_t *children, + int *child_errno, unsigned int child_count); #endif /* __AFR_H__ */ -- cgit