From 94943e213a61c83481308dba6adc6b23c3581d2e Mon Sep 17 00:00:00 2001 From: Pranith K Date: Thu, 14 Jul 2011 06:30:20 +0000 Subject: cluster/afr: Fix conflict files and gfids Change-Id: I771045aca9f9a811744aeec0d844609a37ae4792 BUG: 3734 Reviewed-on: http://review.gluster.com/611 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 762 +++++++++++--- xlators/cluster/afr/src/afr-dir-read.c | 5 +- xlators/cluster/afr/src/afr-dir-write.c | 2 + xlators/cluster/afr/src/afr-open.c | 10 +- xlators/cluster/afr/src/afr-self-heal-common.c | 1194 ++++++++++++++-------- xlators/cluster/afr/src/afr-self-heal-common.h | 24 + xlators/cluster/afr/src/afr-self-heal-data.c | 8 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 320 +++--- xlators/cluster/afr/src/afr-self-heal-metadata.c | 61 +- xlators/cluster/afr/src/afr-self-heal.h | 2 +- xlators/cluster/afr/src/afr.h | 77 +- 11 files changed, 1663 insertions(+), 802 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f49d8c55e2a..69e980a03d5 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -60,6 +60,117 @@ #define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL #define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL +int +afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, + gf_boolean_t fail_conflict); +gf_boolean_t +afr_is_child_present (int32_t *success_children, int32_t child_count, + int32_t child) +{ + gf_boolean_t success_child = _gf_false; + int i = 0; + + GF_ASSERT (child < child_count); + + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + if (child == success_children[i]) { + success_child = _gf_true; + break; + } + } + return success_child; +} + +gf_boolean_t +afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child) +{ + gf_boolean_t source_xattrs = _gf_false; + + GF_ASSERT (child < child_count); + + if ((child >= 0) && (child < child_count) && + sources[child]) { + source_xattrs = _gf_true; + } + return source_xattrs; +} + +gf_boolean_t +afr_is_read_child (int32_t *success_children, int32_t *sources, + int32_t child_count, int32_t child) +{ + gf_boolean_t success_child = _gf_false; + gf_boolean_t source = _gf_false; + + GF_ASSERT (success_children); + GF_ASSERT (child_count > 0); + + success_child = afr_is_child_present (success_children, child_count, + child); + if (!success_child) + goto out; + if (NULL == sources) { + source = _gf_true; + goto out; + } + source = afr_is_source_child (sources, child_count, child); +out: + return (success_child && source); +} + +void +afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count) +{ + int i = 0; + + for (i = 0; i < child_count; i++) + dst[i] = src[i]; +} + +void +afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path) +{ + int i = 0; + afr_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (xattr_req, priv->pending_key[i], + 3 * sizeof(int32_t)); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "%s: Unable to set dict value for %s", + path, priv->pending_key[i]); + /* 3 = data+metadata+entry */ + } +} + +int +afr_errno_count (int32_t *children, int *child_errno, + unsigned int child_count, int32_t op_errno) +{ + int i = 0; + int errno_count = 0; + int child = 0; + + for (i = 0; i < child_count; i++) { + if (children) { + child = children[i]; + if (child == -1) + break; + } else { + child = i; + } + if (child_errno[child] == op_errno) + errno_count++; + } + return errno_count; +} + int32_t afr_set_dict_gfid (dict_t *dict, uuid_t gfid) { @@ -267,9 +378,22 @@ out: } -/** - * afr_local_cleanup - cleanup everything in frame->local - */ +void +afr_reset_xattr (dict_t **xattr, unsigned int child_count) +{ + unsigned int i = 0; + + if (!xattr) + goto out; + for (i = 0; i < child_count; i++) { + if (xattr[i]) { + dict_unref (xattr[i]); + xattr[i] = NULL; + } + } +out: + return; +} void afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) @@ -285,13 +409,14 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) if (sh->buf) GF_FREE (sh->buf); + if (sh->parentbufs) + GF_FREE (sh->parentbufs); + + if (sh->inode) + inode_unref (sh->inode); + if (sh->xattr) { - for (i = 0; i < priv->child_count; i++) { - if (sh->xattr[i]) { - dict_unref (sh->xattr[i]); - sh->xattr[i] = NULL; - } - } + afr_reset_xattr (sh->xattr, priv->child_count); GF_FREE (sh->xattr); } @@ -331,6 +456,9 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) if (sh->child_success) GF_FREE (sh->child_success); + if (sh->fresh_parent_dirs) + GF_FREE (sh->fresh_parent_dirs); + loc_wipe (&sh->parent_loc); } @@ -374,10 +502,13 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) } +/** + * afr_local_cleanup - cleanup everything in frame->local + */ + void afr_local_cleanup (afr_local_t *local, xlator_t *this) { - int i = 0; afr_private_t * priv = NULL; if (!local) @@ -402,12 +533,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) { /* lookup */ if (local->cont.lookup.xattrs) { - for (i = 0; i < priv->child_count; i++) { - if (local->cont.lookup.xattrs[i]) { - dict_unref (local->cont.lookup.xattrs[i]); - local->cont.lookup.xattrs[i] = NULL; - } - } + afr_reset_xattr (local->cont.lookup.xattrs, + priv->child_count); GF_FREE (local->cont.lookup.xattrs); local->cont.lookup.xattrs = NULL; } @@ -585,25 +712,6 @@ afr_deitransform (ino64_t ino, int child_count) } -int -afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - if (local->govinda_gOvinda && local->cont.lookup.inode) { - afr_set_split_brain (this, local->cont.lookup.inode, _gf_true); - } - - AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->cont.lookup.inode, &local->cont.lookup.buf, - local->cont.lookup.xattr, - &local->cont.lookup.postparent); - - return 0; -} - void afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) { @@ -613,13 +721,14 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) dict_t **xattr = NULL; GF_ASSERT (local); - GF_ASSERT (local->cont.lookup.read_child >= 0); buf = &local->cont.lookup.buf; postparent = &local->cont.lookup.postparent; xattr = &local->cont.lookup.xattr; - read_child = local->cont.lookup.read_child; + read_child = afr_read_child (this, local->cont.lookup.inode); + gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d", + read_child); *xattr = dict_ref (local->cont.lookup.xattrs[read_child]); *buf = local->cont.lookup.bufs[read_child]; *postparent = local->cont.lookup.postparents[read_child]; @@ -630,8 +739,7 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) } } - - static void +static void afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this, int child_index, dict_t *xattr) { @@ -742,6 +850,8 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this) local->self_heal.need_metadata_self_heal = _gf_true; local->self_heal.need_data_self_heal = _gf_true; local->self_heal.need_entry_self_heal = _gf_true; + local->self_heal.need_gfid_self_heal = _gf_true; + local->self_heal.need_missing_entry_self_heal = _gf_true; gf_log(this->name, GF_LOG_INFO, "entries are missing in lookup of %s.", local->loc.path); @@ -749,14 +859,15 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this) goto out; } - if (local->success_count > 0) { - if (afr_is_split_brain (this, local->cont.lookup.inode) && - IA_ISREG (local->cont.lookup.inode->ia_type)) { - local->self_heal.need_data_self_heal = _gf_true; - gf_log (this->name, GF_LOG_WARNING, - "split brain detected during lookup of %s.", - local->loc.path); - } + if ((local->success_count > 0) && + afr_is_split_brain (this, local->cont.lookup.inode) && + IA_ISREG (local->cont.lookup.inode->ia_type)) { + local->self_heal.need_data_self_heal = _gf_true; + local->self_heal.need_gfid_self_heal = _gf_true; + local->self_heal.need_missing_entry_self_heal = _gf_true; + gf_log (this->name, GF_LOG_WARNING, + "split brain detected during lookup of %s.", + local->loc.path); } out: @@ -769,49 +880,62 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv) GF_ASSERT (sh); GF_ASSERT (priv); - return ((priv->data_self_heal && sh->need_data_self_heal) + return (sh->need_gfid_self_heal + || sh->need_missing_entry_self_heal + || (priv->data_self_heal && sh->need_data_self_heal) || (priv->metadata_self_heal && sh->need_metadata_self_heal) || (priv->entry_self_heal && sh->need_entry_self_heal)); } -gf_boolean_t -afr_is_self_heal_enabled (afr_private_t *priv) +afr_transaction_type +afr_transaction_type_get (ia_type_t ia_type) { - GF_ASSERT (priv); + afr_transaction_type type = AFR_METADATA_TRANSACTION; - return (priv->data_self_heal || priv->metadata_self_heal - || priv->entry_self_heal); + GF_ASSERT (ia_type != IA_INVAL); + + if (IA_ISDIR (ia_type)) { + type = AFR_ENTRY_TRANSACTION; + } else if (IA_ISREG (ia_type)) { + type = AFR_DATA_TRANSACTION; + } + return type; } int afr_lookup_select_read_child (afr_local_t *local, xlator_t *this, int32_t *read_child) { - int32_t source = -1; - ia_type_t ia_type = 0; - int ret = -1; - afr_transaction_type type = AFR_METADATA_TRANSACTION; - dict_t **xattrs = NULL; - int32_t *child_success = NULL; - struct iatt *bufs = NULL; + ia_type_t ia_type = IA_INVAL; + int32_t source = -1; + int ret = -1; + dict_t **xattrs = NULL; + int32_t *success_children = NULL; + struct iatt *bufs = NULL; + afr_transaction_type type = AFR_METADATA_TRANSACTION; GF_ASSERT (local); GF_ASSERT (this); bufs = local->cont.lookup.bufs; - child_success = local->cont.lookup.child_success; - ia_type = local->cont.lookup.bufs[child_success[0]].ia_type; - if (IA_ISDIR (ia_type)) { - type = AFR_ENTRY_TRANSACTION; - } else if (IA_ISREG (ia_type)) { - type = AFR_DATA_TRANSACTION; - } + success_children = local->cont.lookup.child_success; + /*We can take the success_children[0] only because we already + *handle the conflicting children other wise, we could select the + *read_child based on wrong file type + */ + ia_type = local->cont.lookup.bufs[success_children[0]].ia_type; + type = afr_transaction_type_get (ia_type); xattrs = local->cont.lookup.xattrs; source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs, type); - if (source < 0) + if (source < 0) { + gf_log (this->name, GF_LOG_DEBUG, "failed to select source " + "for %s", local->loc.path); goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s", + source, local->loc.path); *read_child = source; ret = 0; out: @@ -828,7 +952,10 @@ afr_is_self_heal_running (afr_local_t *local) static void afr_launch_self_heal (call_frame_t *frame, xlator_t *this, gf_boolean_t is_background, ia_type_t ia_type, - int (*unwind) (call_frame_t *frame, xlator_t *this)) + inode_t *inode, + void (*gfid_sh_success_cbk) (call_frame_t*, xlator_t*), + int (*unwind) (call_frame_t *frame, xlator_t *this, + int32_t op_ret, int32_t op_errno)) { afr_local_t *local = NULL; char sh_type_str[256] = {0,}; @@ -840,6 +967,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, local->self_heal.background = is_background; local->self_heal.type = ia_type; local->self_heal.unwind = unwind; + local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk; afr_self_heal_type_str_get (&local->self_heal, sh_type_str, @@ -849,11 +977,142 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, "background %s self-heal triggered. path: %s", sh_type_str, local->loc.path); - afr_self_heal (frame, this); + afr_self_heal (frame, this, inode); +} + +int +afr_gfid_missing_count (const char *xlator_name, int32_t *success_children, + struct iatt *bufs, unsigned int child_count, + const char *path) +{ + int gfid_miss_count = 0; + int i = 0; + struct iatt *child1 = NULL; + + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + child1 = &bufs[success_children[i]]; + if (uuid_is_null (child1->ia_gfid)) { + gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null" + " on subvolume %d", path, success_children[i]); + gfid_miss_count++; + } + } + + return gfid_miss_count; +} + +static int +afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this) +{ + int32_t *success_children = NULL; + afr_private_t *priv = NULL; + struct iatt *bufs = NULL; + int miss_count = 0; + + priv = this->private; + bufs = local->cont.lookup.bufs; + success_children = local->cont.lookup.child_success; + + miss_count = afr_gfid_missing_count (this->name, success_children, + bufs, priv->child_count, + local->loc.path); + return miss_count; +} + +gf_boolean_t +afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children, + unsigned int child_count, const char *path, + const char *xlator_name) +{ + gf_boolean_t conflicting = _gf_false; + int i = 0; + struct iatt *child1 = NULL; + struct iatt *child2 = NULL; + uuid_t *gfid = NULL; + char gfid_str[64] = {0}; + + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + child1 = &bufs[success_children[i]]; + if ((!gfid) && (!uuid_is_null (child1->ia_gfid))) + gfid = &child1->ia_gfid; + + if (i == 0) + continue; + + child2 = &bufs[success_children[i-1]]; + if (FILETYPE_DIFFERS (child1, child2)) { + gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype " + "differs on subvolumes (%d, %d)", path, + success_children[i-1], success_children[i]); + conflicting = _gf_true; + goto out; + } + if (!gfid || uuid_is_null (child1->ia_gfid)) + continue; + if (uuid_compare (*gfid, child1->ia_gfid)) { + uuid_utoa_r (*gfid, gfid_str); + gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs" + " on subvolume %d (%s, %s)", path, + success_children[i], gfid_str, + uuid_utoa (child1->ia_gfid)); + conflicting = _gf_true; + goto out; + } + } +out: + return conflicting; +} + +/* afr_update_gfid_from_iatts: This function should be called only if the + * iatts are not conflicting. + */ +void +afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs, + int32_t *success_children, unsigned int child_count) +{ + uuid_t *gfid = NULL; + int i = 0; + int child = 0; + + for (i = 0; i < child_count; i++) { + child = success_children[i]; + if (child == -1) + break; + if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) { + gfid = &bufs[child].ia_gfid; + } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) { + if (uuid_compare (*gfid, bufs[child].ia_gfid)) { + GF_ASSERT (0); + goto out; + } + } + } + if (gfid && (!uuid_is_null (*gfid))) + uuid_copy (uuid, *gfid); +out: + return; +} + +static gf_boolean_t +afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this) +{ + afr_private_t *priv = NULL; + gf_boolean_t conflict = _gf_false; + + priv = this->private; + conflict = afr_conflicting_iattrs (local->cont.lookup.bufs, + local->cont.lookup.child_success, + priv->child_count, local->loc.path, + this->name); + return conflict; } static void -afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) +afr_lookup_set_self_heal_data (afr_local_t *local, xlator_t *this) { int i = 0; struct iatt *bufs = NULL; @@ -862,8 +1121,20 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) int32_t child1 = -1; int32_t child2 = -1; + priv = this->private; afr_detect_self_heal_by_lookup_status (local, this); + if (afr_lookup_gfid_missing_count (local, this)) + local->self_heal.need_gfid_self_heal = _gf_true; + + if (_gf_true == afr_lookup_conflicting_entries (local, this)) + local->self_heal.need_missing_entry_self_heal = _gf_true; + else + afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req, + local->cont.lookup.bufs, + local->cont.lookup.child_success, + priv->child_count); + bufs = local->cont.lookup.bufs; for (i = 1; i < local->success_count; i++) { child1 = local->cont.lookup.child_success[i-1]; @@ -873,7 +1144,6 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) } xattr = local->cont.lookup.xattrs; - priv = this->private; for (i = 0; i < local->success_count; i++) { child1 = local->cont.lookup.child_success[i];; afr_lookup_detect_self_heal_by_xattr (local, this, @@ -881,6 +1151,74 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) } } +int +afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) { + local->op_ret = -1; + if (afr_error_more_important (local->op_errno, op_errno)) + local->op_errno = op_errno; + + goto out; + } else { + local->op_ret = 0; + } + + afr_lookup_done_success_action (frame, this, _gf_true); +out: + AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->cont.lookup.inode, &local->cont.lookup.buf, + local->cont.lookup.xattr, + &local->cont.lookup.postparent); + + return 0; +} + +//TODO: At the moment only lookup needs this, so not doing any checks, in the +// future we will have to do fop specific operations +void +afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_local_t *sh_local = NULL; + afr_private_t *priv = NULL; + afr_self_heal_t *sh = NULL; + int i = 0; + struct iatt *lookup_bufs = NULL; + struct iatt *lookup_parentbufs = NULL; + + sh_local = sh_frame->local; + sh = &sh_local->self_heal; + local = sh->orig_frame->local; + lookup_bufs = local->cont.lookup.bufs; + lookup_parentbufs = local->cont.lookup.postparents; + priv = this->private; + + memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf)); + memcpy (lookup_parentbufs, sh->parentbufs, + priv->child_count * sizeof (*sh->parentbufs)); + + afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count); + if (local->cont.lookup.xattr) { + dict_unref (local->cont.lookup.xattr); + local->cont.lookup.xattr = NULL; + } + + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) + local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]); + } + afr_reset_children (local->cont.lookup.child_success, + priv->child_count); + afr_children_copy (local->cont.lookup.child_success, + sh->fresh_children, priv->child_count); +} + static void afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this, gf_boolean_t *sh_launched) @@ -901,20 +1239,15 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this, goto out; } - if (_gf_false == afr_is_self_heal_enabled (priv)) { - gf_log (this->name, GF_LOG_DEBUG, - "Self heal is not enabled"); - goto out; - } - - afr_lookup_detect_self_heal (local, this); + afr_lookup_set_self_heal_data (local, this); if (afr_can_self_heal_proceed (&local->self_heal, priv)) { - if (afr_is_self_heal_running (local)) { + if (afr_is_self_heal_running (local)) goto out; - } afr_launch_self_heal (frame, this, _gf_true, local->cont.lookup.buf.ia_type, + local->cont.lookup.inode, + afr_post_gfid_sh_success, afr_self_heal_lookup_unwind); *sh_launched = _gf_true; } @@ -922,46 +1255,6 @@ out: return; } -static gf_boolean_t -afr_lookup_split_brain (afr_local_t *local, xlator_t *this) -{ - int i = 0; - gf_boolean_t symptom = _gf_false; - struct iatt *bufs = NULL; - int32_t *child_success = NULL; - struct iatt *child1 = NULL; - struct iatt *child2 = NULL; - const char *path = NULL; - - bufs = local->cont.lookup.bufs; - child_success = local->cont.lookup.child_success; - for (i = 1; i < local->success_count; i++) { - child1 = &bufs[child_success[i-1]]; - child2 = &bufs[child_success[i]]; - /* - * TODO: gfid self-heal - * if (uuid_compare (child1->ia_gfid, child2->ia_gfid)) { - * gf_log (this->name, GF_LOG_WARNING, "%s: gfid differs" - * " on subvolumes (%d, %d)", local->loc.path, - * child_success[i-1], child_success[i]); - * symptom = _gf_true; - * } - */ - - if (FILETYPE_DIFFERS (child1, child2)) { - path = local->loc.path; - gf_log (this->name, GF_LOG_WARNING, "%s: filetype " - "differs on subvolumes (%d, %d)", path, - child_success[i-1], child_success[i]); - symptom = _gf_true; - local->govinda_gOvinda = 1; - } - if (symptom) - break; - } - return symptom; -} - static int afr_lookup_set_read_child (afr_local_t *local, xlator_t *this, int32_t read_child) { @@ -973,6 +1266,60 @@ afr_lookup_set_read_child (afr_local_t *local, xlator_t *this, int32_t read_chil return 0; } +int +afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, + gf_boolean_t fail_conflict) +{ + int32_t read_child = -1; + int32_t ret = -1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + if (local->loc.parent == NULL) + fail_conflict = _gf_true; + + if (afr_conflicting_iattrs (local->cont.lookup.bufs, + local->cont.lookup.child_success, + priv->child_count, local->loc.path, + this->name)) { + if (fail_conflict == _gf_false) { + ret = 0; + } else { + local->op_ret = -1; + local->op_errno = EIO; + } + goto out; + } + + ret = afr_lookup_select_read_child (local, this, &read_child); + if (ret) { + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } + + ret = afr_lookup_set_read_child (local, this, read_child); + if (ret) { + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } + + afr_lookup_build_response_params (local, this); + if (afr_is_fresh_lookup (&local->loc, this)) { + afr_update_loc_gfids (&local->loc, + &local->cont.lookup.buf, + &local->cont.lookup.postparent); + } + + ret = 0; +out: + return ret; +} + static void afr_lookup_done (call_frame_t *frame, xlator_t *this) { @@ -981,44 +1328,44 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; int ret = -1; gf_boolean_t sh_launched = _gf_false; - int32_t read_child = -1; + int gfid_miss_count = 0; + int enotconn_count = 0; + int up_children_count = 0; priv = this->private; local = frame->local; if (local->op_ret < 0) goto unwind; - - if (_gf_true == afr_lookup_split_brain (local, this)) { - local->op_ret = -1; - local->op_errno = EIO; - goto unwind; - } - - ret = afr_lookup_select_read_child (local, this, &read_child); - if (ret) { + gfid_miss_count = afr_lookup_gfid_missing_count (local, this); + up_children_count = afr_up_children_count (priv->child_count, + local->child_up); + enotconn_count = priv->child_count - up_children_count; + if ((gfid_miss_count == local->success_count) && + (enotconn_count > 0)) { local->op_ret = -1; local->op_errno = EIO; + gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, " + "LOOKUP on a file without gfid is not allowed when " + "some of the children are down", local->loc.path); goto unwind; } - ret = afr_lookup_set_read_child (local, this, read_child); + ret = afr_lookup_done_success_action (frame, this, _gf_false); if (ret) goto unwind; - - afr_lookup_build_response_params (local, this); - if (afr_is_fresh_lookup (&local->loc, this)) { - afr_update_loc_gfids (&local->loc, &local->cont.lookup.buf, - &local->cont.lookup.postparent); - } + uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req); afr_lookup_perform_self_heal_if_needed (frame, this, &sh_launched); - if (sh_launched) + if (sh_launched) { unwind = 0; + goto unwind; + } + unwind: if (unwind) { AFR_STACK_UNWIND (lookup, frame, local->op_ret, - local->op_errno, local->cont.lookup.inode, + local->op_errno, local->cont.lookup.inode, &local->cont.lookup.buf, local->cont.lookup.xattr, &local->cont.lookup.postparent); @@ -1034,8 +1381,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) * */ -static gf_boolean_t -__error_more_important (int32_t old_errno, int32_t new_errno) +gf_boolean_t +afr_error_more_important (int32_t old_errno, int32_t new_errno) { gf_boolean_t ret = _gf_true; @@ -1050,6 +1397,28 @@ __error_more_important (int32_t old_errno, int32_t new_errno) return ret; } +int32_t +afr_resultant_errno_get (int32_t *children, + int *child_errno, unsigned int child_count) +{ + int i = 0; + int32_t op_errno = 0; + int child = 0; + + for (i = 0; i < child_count; i++) { + if (children) { + child = children[i]; + if (child == -1) + break; + } else { + child = i; + } + if (afr_error_more_important (op_errno, child_errno[child])) + op_errno = child_errno[child]; + } + return op_errno; +} + static void afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno) { @@ -1057,7 +1426,7 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno) if (op_errno == ENOENT) local->enoent_count++; - if (__error_more_important (local->op_errno, op_errno)) + if (afr_error_more_important (local->op_errno, op_errno)) local->op_errno = op_errno; if (local->op_errno == ESTALE) { local->op_ret = -1; @@ -1196,7 +1565,6 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count) local->cont.lookup.child_success = child_success; - local->cont.lookup.read_child = -1; ret = 0; out: return ret; @@ -1208,6 +1576,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, { afr_private_t *priv = NULL; afr_local_t *local = NULL; + void *gfid_req = NULL; int ret = -1; int i = 0; int call_count = 0; @@ -1277,23 +1646,13 @@ afr_lookup (call_frame_t *frame, xlator_t *this, else local->xattr_req = dict_ref (xattr_req); - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i], - 3 * sizeof(int32_t)); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "%s: Unable to set dict value for %s", - loc->path, priv->pending_key[i]); - /* 3 = data+metadata+entry */ - } - + afr_xattr_req_prepare (this, local->xattr_req, loc->path); ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "%s: Unable to set dict value for %s", loc->path, GLUSTERFS_INODELK_COUNT); } - ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, @@ -1301,6 +1660,16 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc->path, GLUSTERFS_ENTRYLK_COUNT); } + ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get the gfid from dict"); + } else { + uuid_copy (local->cont.lookup.gfid_req, gfid_req); + } + if (local->loc.parent != NULL) + dict_del (xattr_req, "gfid-req"); + for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND_COOKIE (frame, afr_lookup_cbk, @@ -2810,3 +3179,86 @@ afr_notify (xlator_t *this, int32_t event, out: return ret; } +void +afr_reset_children (int32_t *fresh_children, int32_t child_count) +{ + unsigned int i = 0; + for (i = 0; i < child_count; i++) + fresh_children[i] = -1; +} + +int32_t* +afr_fresh_children_create (int32_t child_count) +{ + int32_t *fresh_children = NULL; + int i = 0; + + GF_ASSERT (child_count > 0); + + fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children), + gf_afr_mt_int32_t); + if (NULL == fresh_children) + goto out; + for (i = 0; i < child_count; i++) + fresh_children[i] = -1; +out: + return fresh_children; +} + +void +afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, + int32_t child_count) +{ + gf_boolean_t child_found = _gf_false; + int i = 0; + + for (i = 0; i < child_count; i++) { + if (fresh_children[i] == -1) + break; + if (fresh_children[i] == child) { + child_found = _gf_true; + break; + } + } + if (!child_found) { + GF_ASSERT (i < child_count); + fresh_children[i] = child; + } +} + +int +afr_get_children_count (int32_t *fresh_children, unsigned int child_count) +{ + int count = 0; + int i = 0; + + for (i = 0; i < child_count; i++) { + if (fresh_children[i] == -1) + break; + count++; + } + return count; +} + +void +afr_get_fresh_children (int32_t *success_children, int32_t *sources, + int32_t *fresh_children, unsigned int child_count) +{ + unsigned int i = 0; + unsigned int j = 0; + + GF_ASSERT (success_children); + GF_ASSERT (sources); + GF_ASSERT (fresh_children); + + afr_reset_children (fresh_children, child_count); + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + if (afr_is_read_child (success_children, sources, child_count, + success_children[i])) { + fresh_children[j] = success_children[i]; + j++; + } + } +} diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 3f056b68628..9e799b85813 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -51,7 +51,8 @@ int -afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this) +afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, + int32_t op_errno) { afr_local_t *local = NULL; @@ -177,7 +178,7 @@ out: " forced merge option set", sh_type_str, local->loc.path); - afr_self_heal (frame, this); + afr_self_heal (frame, this, local->fd->inode); } else { afr_set_opendir_done (this, local->fd->inode); diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 3212c1e9563..c6f68a072c3 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -54,6 +54,8 @@ afr_build_parent_loc (loc_t *parent, loc_t *child) char *tmp = NULL; if (!child->parent) { + //this should never be called with root as the child + GF_ASSERT (0); loc_copy (parent, child); return; } diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index c68c025dd79..306f5a85af0 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -277,7 +277,8 @@ __unopened_count (int child_count, unsigned int *opened_on, unsigned char *child int -afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this) +afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, + int32_t op_errno) { afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -418,9 +419,8 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this) GF_ASSERT (local->loc.path); /* forcibly trigger missing-entries self-heal */ - local->success_count = 1; - local->enoent_count = 1; - + sh->need_missing_entry_self_heal = _gf_true; + sh->need_gfid_self_heal = _gf_true; sh->data_lock_held = _gf_true; sh->need_data_self_heal = _gf_true; sh->type = local->fd->inode->ia_type; @@ -434,7 +434,7 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this) "path: %s, reason: Replicate up down flush, data lock is held", sh_type_str, local->loc.path); - afr_self_heal (frame, this); + afr_self_heal (frame, this, local->fd->inode); return 0; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b29deb8bca2..dc660e19888 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -27,6 +27,21 @@ #include "afr-self-heal.h" #include "pump.h" +//Intersection[child]=1 if child is part of intersection +void +afr_children_intersection_get (int32_t *set1, int32_t *set2, + int *intersection, unsigned int child_count) +{ + int i = 0; + + memset (intersection, 0, sizeof (*intersection) * child_count); + for (i = 0; i < child_count; i++) { + intersection[i] = afr_is_child_present (set1, child_count, i) + && afr_is_child_present (set2, child_count, + i); + } +} + /** * select_source - select a source and return it */ @@ -70,6 +85,14 @@ afr_sh_source_count (int sources[], int child_count) return nsource; } +void +afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno) +{ + sh->op_ret = -1; + if (afr_error_more_important (sh->op_errno, op_errno)) + sh->op_errno = op_errno; +} + void afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this) { @@ -186,31 +209,6 @@ out: return ret; } - -/** - * mark_sources: Mark all 'source' nodes and return number of source - * nodes found - * - * A node (a row in the pending matrix) belongs to one of - * three categories: - * - * M is the pending matrix. - * - * 'innocent' - M[i] is all zeroes - * 'fool' - M[i] has i'th element = 1 (self-reference) - * 'wise' - M[i] has i'th element = 0, others are 1 or 0. - * - * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is - * needed. - * - * A 'wise' node can be a source. If two 'wise' nodes conflict, it is - * a split-brain. If one wise node refers to the other but the other doesn't - * refer back, the referrer is a source. - * - * All fools are sinks, unless there are no 'wise' nodes. In that case, - * one of the fools is made a source. - */ - typedef enum { AFR_NODE_INNOCENT, AFR_NODE_FOOL, @@ -584,6 +582,60 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child, return type; } +int +afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs, + int32_t **pending_matrix, int32_t *sources, + int32_t *success_children, afr_transaction_type type) +{ + afr_private_t *priv = NULL; + afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID; + int nsources = -1; + + priv = this->private; + + if (afr_get_children_count (success_children, priv->child_count) == 0) + goto out; + + afr_build_pending_matrix (priv->pending_key, pending_matrix, + xattr, type, priv->child_count); + + sh_type = afr_self_heal_type_for_transaction (type); + if (AFR_SELF_HEAL_INVALID == sh_type) + goto out; + + afr_sh_print_pending_matrix (pending_matrix, this); + + nsources = afr_mark_sources (sources, pending_matrix, bufs, + priv->child_count, sh_type, + success_children, this->name); +out: + return nsources; +} + +/** + * mark_sources: Mark all 'source' nodes and return number of source + * nodes found + * + * A node (a row in the pending matrix) belongs to one of + * three categories: + * + * M is the pending matrix. + * + * 'innocent' - M[i] is all zeroes + * 'fool' - M[i] has i'th element = 1 (self-reference) + * 'wise' - M[i] has i'th element = 0, others are 1 or 0. + * + * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is + * needed. + * + * A 'wise' node can be a source. If two 'wise' nodes conflict, it is + * a split-brain. If one wise node refers to the other but the other doesn't + * refer back, the referrer is a source. + * + * All fools are sinks, unless there are no 'wise' nodes. In that case, + * one of the fools is made a source. + */ + int afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, int32_t child_count, afr_self_heal_type type, @@ -886,7 +938,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this) sh->xattr[i] = NULL; } - if (local->govinda_gOvinda) { + if (local->govinda_gOvinda || sh->op_failed) { gf_log (this->name, GF_LOG_INFO, "split brain found, aborting selfheal of %s", local->loc.path); @@ -904,7 +956,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this) static int -sh_missing_entries_finish (call_frame_t *frame, xlator_t *this) +afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; @@ -918,524 +970,751 @@ sh_missing_entries_finish (call_frame_t *frame, xlator_t *this) return 0; } - -static int -sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int op_errno, - struct iatt *preop, struct iatt *postop) +static void +afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, + dict_t *xattr, struct iatt *postparent) { - afr_local_t *local = NULL; - loc_t *parent_loc = cookie; - int call_count = 0; + int child_index = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_self_heal_t *sh = NULL; local = frame->local; + priv = this->private; + sh = &local->self_heal; + child_index = (long) cookie; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_INFO, - "setattr on %s failed: %s", - local->loc.path, strerror (op_errno)); + LOCK (&frame->lock); + { + if (op_ret == 0) { + sh->buf[child_index] = *buf; + sh->parentbuf = *postparent; + sh->parentbufs[child_index] = *postparent; + sh->child_success[sh->success_count] = child_index; + sh->success_count++; + sh->xattr[child_index] = dict_ref (xattr); + } else { + gf_log (this->name, GF_LOG_ERROR, + "path %s on subvolume %s => -1 (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + local->self_heal.child_errno[child_index] = op_errno; + } } + UNLOCK (&frame->lock); + return; +} - if (parent_loc) { - loc_wipe (parent_loc); - GF_FREE (parent_loc); +gf_boolean_t +afr_valid_ia_type (ia_type_t ia_type) +{ + switch (ia_type) { + case IA_IFSOCK: + case IA_IFREG: + case IA_IFBLK: + case IA_IFCHR: + case IA_IFIFO: + case IA_IFLNK: + case IA_IFDIR: + return _gf_true; + default: + return _gf_false; } + return _gf_false; +} - call_count = afr_frame_return (frame); +void +afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this, + int child_index, struct iatt *buf, + struct iatt *postparent, + afr_impunge_done_cbk_t impunge_done) +{ + call_frame_t *impunge_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *sh = NULL; + afr_self_heal_t *impunge_sh = NULL; + int32_t op_errno = 0; - if (call_count == 0) { - STACK_DESTROY (frame->root); + impunge_frame = copy_frame (frame); + if (!impunge_frame) { + op_errno = ENOMEM; + goto out; } - return 0; -} + ALLOC_OR_GOTO (impunge_local, afr_local_t, out); + local = frame->local; + sh = &local->self_heal; + impunge_frame->local = impunge_local; + impunge_sh = &impunge_local->self_heal; + impunge_sh->sh_frame = frame; + impunge_sh->active_source = sh->source; + impunge_sh->impunging_entry_mode = st_mode_from_ia (buf->ia_prot, + buf->ia_type); + impunge_sh->impunge_ret_child = child_index; + loc_copy (&impunge_local->loc, &local->loc); + sh->impunge_done = impunge_done; + impunge_local->call_count = 1; + afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf, + postparent); + return; +out: + gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s", + local->loc.path, strerror (op_errno)); + impunge_done (frame, this, child_index, -1, op_errno); +} -static int -sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - call_frame_t *setattr_frame = NULL; - int call_count = 0; - int child_index = 0; - loc_t *parent_loc = NULL; - struct iatt stbuf = {0,}; - int32_t valid = 0; +int +afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child, + int32_t op_ret, int32_t op_errno) +{ + int call_count = 0; + afr_local_t *local = NULL; local = frame->local; - sh = &local->self_heal; - priv = this->private; - child_index = (long) cookie; - - stbuf.ia_atime = sh->buf[sh->source].ia_atime; - stbuf.ia_atime_nsec = sh->buf[sh->source].ia_atime_nsec; - stbuf.ia_mtime = sh->buf[sh->source].ia_mtime; - stbuf.ia_mtime_nsec = sh->buf[sh->source].ia_mtime_nsec; + if (op_ret == -1) + gf_log (this->name, GF_LOG_ERROR, + "create entry %s failed, on child %d reason, %s", + local->loc.path, child, strerror (op_errno)); + call_count = afr_frame_return (frame); + if (call_count == 0) + afr_sh_missing_entries_finish (frame, this); + return 0; +} - stbuf.ia_uid = sh->buf[sh->source].ia_uid; - stbuf.ia_gid = sh->buf[sh->source].ia_gid; +static int +sh_missing_entries_create (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int type = 0; + afr_private_t *priv = NULL; + int enoent_count = 0; + int i = 0; + struct iatt *buf = NULL; + struct iatt *postparent = NULL; - valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID | - GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; + local = frame->local; + sh = &local->self_heal; + priv = this->private; - if (op_ret == -1) { + enoent_count = afr_errno_count (NULL, sh->child_errno, + priv->child_count, ENOENT); + if (enoent_count == 0) { gf_log (this->name, GF_LOG_INFO, - "%s: failed to mknod on %s (%s)", - local->loc.path, priv->children[child_index]->name, - strerror (op_errno)); + "no missing files - %s. proceeding to metadata check", + local->loc.path); + /* proceed to next step - metadata self-heal */ + afr_sh_missing_entries_finish (frame, this); + return 0; } - if (op_ret == 0) { - setattr_frame = copy_frame (frame); - - setattr_frame->local = GF_CALLOC (1, sizeof (afr_local_t), - gf_afr_mt_afr_local_t); + buf = &sh->buf[sh->source]; + postparent = &sh->parentbufs[sh->source]; - ((afr_local_t *)setattr_frame->local)->call_count = 2; - - gf_log (this->name, GF_LOG_TRACE, - "setattr (%s) on subvolume %s", - local->loc.path, priv->children[child_index]->name); - - STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk, - (void *) (long) 0, - priv->children[child_index], - priv->children[child_index]->fops->setattr, - &local->loc, &stbuf, valid); - - valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; - parent_loc = GF_CALLOC (1, sizeof (*parent_loc), - gf_afr_mt_loc_t); - afr_build_parent_loc (parent_loc, &local->loc); - - STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk, - (void *) (long) parent_loc, - priv->children[child_index], - priv->children[child_index]->fops->setattr, - parent_loc, &sh->parentbuf, valid); + type = buf->ia_type; + if (!afr_valid_ia_type (type)) { + gf_log (this->name, GF_LOG_ERROR, + "%s: unknown file type: 0%o", local->loc.path, type); + local->govinda_gOvinda = 1; + afr_sh_missing_entries_finish (frame, this); + goto out; } - call_count = afr_frame_return (frame); - - if (call_count == 0) { - sh_missing_entries_finish (frame, this); + local->call_count = enoent_count; + for (i = 0; i < priv->child_count; i++) { + //If !child_up errno will be zero + if (sh->child_errno[i] != ENOENT) + continue; + afr_sh_call_entry_impunge_recreate (frame, this, i, + buf, postparent, + afr_sh_create_entry_cbk); + enoent_count--; } - + GF_ASSERT (enoent_count == 0); +out: return 0; } - -static int -sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this) +void +afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; - int i = 0; - int ret = 0; - int enoent_count = 0; - int call_count = 0; - mode_t st_mode = 0; - dev_t ia_rdev = 0; - dict_t *dict = NULL; - dev_t st_rdev = 0; + int32_t op_errno = 0; + ia_type_t ia_type = IA_INVAL; + int32_t nsources = 0; local = frame->local; sh = &local->self_heal; priv = this->private; - for (i = 0; i < priv->child_count; i++) - if (sh->child_errno[i] == ENOENT) - enoent_count++; + if (afr_get_children_count (sh->child_success, + priv->child_count) == 0) { + op_errno = afr_resultant_errno_get (NULL, sh->child_errno, + priv->child_count); + goto out; + } - call_count = enoent_count; - local->call_count = call_count; + if (afr_gfid_missing_count (this->name, sh->child_success, + sh->buf, priv->child_count, + local->loc.path) || + afr_conflicting_iattrs (sh->buf, sh->child_success, + priv->child_count, local->loc.path, + this->name)) { + //this can happen if finding the fresh parent dir failed + local->govinda_gOvinda = 1; + sh->op_failed = 1; + op_errno = EIO; + goto out; + } - st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot, - sh->buf[sh->source].ia_type); - ia_rdev = sh->buf[sh->source].ia_rdev; - st_rdev = makedev (ia_major (ia_rdev), ia_minor (ia_rdev)); + //now No chance for the ia_type to conflict + ia_type = sh->buf[sh->child_success[0]].ia_type; + nsources = afr_build_sources (this, sh->xattr, sh->buf, + sh->pending_matrix, sh->sources, + sh->child_success, + afr_transaction_type_get (ia_type)); + if (nsources < 0) { + gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," + " in missing entry self-heal, continuing with the rest" + " of the self-heals", local->loc.path); + op_errno = EIO; + goto out; + } - gf_log (this->name, GF_LOG_TRACE, - "mknod %s mode 0%o device type %"PRId64" on %d subvolumes", - local->loc.path, st_mode, (uint64_t)st_rdev, enoent_count); + afr_get_fresh_children (sh->child_success, sh->sources, + sh->fresh_children, priv->child_count); + sh->source = sh->fresh_children[0]; + if (sh->source == -1) { + gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); + op_errno = EIO; + goto out; + } - dict = dict_new (); - if (!dict) - gf_log (this->name, GF_LOG_ERROR, "out of memory"); + if (sh->gfid_sh_success_cbk) + sh->gfid_sh_success_cbk (frame, this); + sh_missing_entries_create (frame, this); + return; +out: + afr_sh_set_error (sh, op_errno); + afr_sh_missing_entries_finish (frame, this); + return; +} - ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid); - if (ret) - gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed", - local->loc.path); +static int +afr_sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, + struct iatt *postparent) +{ + int call_count = 0; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; - for (i = 0; i < priv->child_count; i++) { - if (sh->child_errno[i] == ENOENT) { - STACK_WIND_COOKIE (frame, - sh_missing_entries_newentry_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->mknod, - &local->loc, st_mode, st_rdev, dict); - if (!--call_count) - break; - } - } + local = frame->local; + sh = &local->self_heal; + priv = this->private; - if (dict) - dict_unref (dict); + afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, + op_errno, inode, buf, xattr, + postparent); + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_missing_entries_lookup_done (frame, this); return 0; } - -static int -sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this) +int +afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child, + int32_t op_ret, int32_t op_errno) { + int call_count = 0; afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - dict_t *dict = NULL; - int i = 0; - int ret = 0; - int enoent_count = 0; - int call_count = 0; - mode_t st_mode = 0; local = frame->local; sh = &local->self_heal; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) - if (sh->child_errno[i] == ENOENT) - enoent_count++; - call_count = enoent_count; - local->call_count = call_count; - - st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot, - sh->buf[sh->source].ia_type); - - dict = dict_new (); - if (!dict) { + GF_ASSERT (sh->post_remove_call); + if ((op_ret == -1) && (op_errno != ENOENT)) { gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - sh_missing_entries_finish (frame, this); - return 0; + "purge entry %s failed, on child %d reason, %s", + local->loc.path, child, strerror (op_errno)); + LOCK (&frame->lock); + { + afr_sh_set_error (sh, EIO); + sh->op_failed = 1; + } + UNLOCK (&frame->lock); } + call_count = afr_frame_return (frame); + if (call_count == 0) + sh->post_remove_call (frame, this); + return 0; +} - ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid); - if (ret) - gf_log (this->name, GF_LOG_INFO, - "%s: inode gfid set failed", local->loc.path); +void +afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this, + int child_index, struct iatt *buf, + afr_expunge_done_cbk_t expunge_done) +{ + call_frame_t *expunge_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *expunge_local = NULL; + afr_self_heal_t *sh = NULL; + afr_self_heal_t *expunge_sh = NULL; + int32_t op_errno = 0; + expunge_frame = copy_frame (frame); + if (!expunge_frame) { + goto out; + } - gf_log (this->name, GF_LOG_TRACE, - "mkdir %s mode 0%o on %d subvolumes", - local->loc.path, st_mode, enoent_count); + ALLOC_OR_GOTO (expunge_local, afr_local_t, out); - for (i = 0; i < priv->child_count; i++) { - if (sh->child_errno[i] == ENOENT) { - if (!strcmp (local->loc.path, "/")) { - /* We shouldn't try to create "/" */ + local = frame->local; + sh = &local->self_heal; + expunge_frame->local = expunge_local; + expunge_sh = &expunge_local->self_heal; + expunge_sh->sh_frame = frame; + loc_copy (&expunge_local->loc, &local->loc); + sh->expunge_done = expunge_done; + afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf); + return; +out: + gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s", + local->loc.path, strerror (op_errno)); + expunge_done (frame, this, child_index, -1, op_errno); +} - sh_missing_entries_finish (frame, this); +void +afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children, + int32_t *fresh_children, + unsigned int child_count) +{ + int i = 0; - return 0; - } else { - STACK_WIND_COOKIE (frame, - sh_missing_entries_newentry_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->mkdir, - &local->loc, st_mode, dict); - if (!--call_count) - break; - } + for (i = 0; i < child_count; i++) { + if (afr_is_child_present (success_children, child_count, i) && + !afr_is_child_present (fresh_children, child_count, i)) { + sh->child_errno[i] = ENOENT; + GF_ASSERT (sh->xattr[i]); + dict_unref (sh->xattr[i]); + sh->xattr[i] = NULL; } } +} - if (dict) - dict_unref (dict); +int +afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + if (sh->op_failed) { + afr_sh_missing_entries_finish (frame, this); + } else { + if (afr_gfid_missing_count (this->name, sh->fresh_children, + sh->buf, priv->child_count, + local->loc.path)) { + afr_sh_common_lookup (frame, this, &local->loc, + afr_sh_missing_entries_lookup_cbk, + _gf_true); + } else { + //No need to set gfid so goto missing entries lookup done + //Behave as if you have done the lookup + afr_sh_remove_stale_lookup_info (sh, + sh->child_success, + sh->fresh_children, + priv->child_count); + afr_children_copy (sh->child_success, + sh->fresh_children, + priv->child_count); + afr_sh_missing_entries_lookup_done (frame, this); + } + } return 0; } - -static int -sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this, - const char *link, struct iatt *buf) +gf_boolean_t +afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv, + int child) { - afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - dict_t *dict = NULL; - int i = 0; - int ret = 0; - int enoent_count = 0; - int call_count = 0; - - local = frame->local; sh = &local->self_heal; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) - if (sh->child_errno[i] == ENOENT) - enoent_count++; - call_count = enoent_count; - local->call_count = call_count; + if (local->child_up[child] && + (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count, + child)) + && (sh->child_errno[child] != ENOENT)) + return _gf_true; - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - sh_missing_entries_finish (frame, this); - return 0; - } + return _gf_false; +} - ret = afr_set_dict_gfid (dict, buf->ia_gfid); - if (ret) - gf_log (this->name, GF_LOG_DEBUG, - "%s: dict gfid set failed", local->loc.path); +gf_boolean_t +afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv, + int child) +{ + afr_self_heal_t *sh = NULL; - gf_log (this->name, GF_LOG_TRACE, - "symlink %s -> %s on %d subvolumes", - local->loc.path, link, enoent_count); + sh = &local->self_heal; - for (i = 0; i < priv->child_count; i++) { - if (sh->child_errno[i] == ENOENT) { - STACK_WIND_COOKIE (frame, - sh_missing_entries_newentry_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->symlink, - link, &local->loc, dict); - if (!--call_count) - break; - } - } + if (local->child_up[child] && + (!afr_is_child_present (sh->fresh_children, priv->child_count, + child)) + && (sh->child_errno[child] != ENOENT)) + return _gf_true; - return 0; + return _gf_false; } - -static int -sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - const char *link, struct iatt *sbuf) +void +afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this, + gf_boolean_t purge_condition (afr_local_t *local, + afr_private_t *priv, + int child)) { afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; + afr_self_heal_t *sh = NULL; + int i = 0; + int call_count = 0; local = frame->local; sh = &local->self_heal; priv = this->private; - if (op_ret > 0) - sh_missing_entries_symlink (frame, this, link, sbuf); - else { - gf_log (this->name, GF_LOG_INFO, - "%s: failed to do readlink on %s (%s)", - local->loc.path, priv->children[sh->source]->name, - strerror (op_errno)); - sh_missing_entries_finish (frame, this); + for (i = 0; i < priv->child_count; i++) { + if (purge_condition (local, priv, i)) + call_count++; } - return 0; -} + if (call_count == 0) { + sh->post_remove_call (frame, this); + goto out; + } + local->call_count = call_count; + for (i = 0; i < priv->child_count; i++) { + if (!purge_condition (local, priv, i)) + continue; + afr_sh_call_entry_expunge_remove (frame, this, + (long) i, &sh->buf[i], + afr_sh_remove_entry_cbk); + } +out: + return; +} -static int -sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this) +void +afr_sh_purge_entry (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; local = frame->local; sh = &local->self_heal; - priv = this->private; + sh->post_remove_call = afr_sh_missing_entries_finish; - STACK_WIND (frame, sh_missing_entries_readlink_cbk, - priv->children[sh->source], - priv->children[sh->source]->fops->readlink, - &local->loc, 4096); - - return 0; + afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition); } - -static int -sh_missing_entries_create (call_frame_t *frame, xlator_t *this) +void +afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; - int type = 0; - int i = 0; afr_private_t *priv = NULL; - int enoent_count = 0; - int govinda_gOvinda = 0; + int i = 0; local = frame->local; sh = &local->self_heal; priv = this->private; + sh->post_remove_call = afr_sh_purge_stale_entries_done; + for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i]) + if (afr_is_child_present (sh->fresh_children, + priv->child_count, i)) continue; - if (sh->child_errno[i]) { - if (sh->child_errno[i] == ENOENT) - enoent_count++; - } else { - if (type) { - if (type != sh->buf[i].ia_type) { - gf_log (this->name, GF_LOG_DEBUG, - "file %s is not recoverable " - "automatically!", - local->loc.path); - - govinda_gOvinda = 1; - } - } else { - sh->source = i; - type = sh->buf[i].ia_type; - } - } - } + if ((!local->child_up[i]) || sh->child_errno[i] != 0) + continue; - if (govinda_gOvinda) { - gf_log (this->name, GF_LOG_ERROR, - "conflicting filetypes exist for path %s. returning.", - local->loc.path); + GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) || + uuid_is_null (sh->buf[i].ia_gfid)); - local->govinda_gOvinda = 1; - sh_missing_entries_finish (frame, this); - return 0; - } + if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) || + (uuid_compare (sh->buf[i].ia_gfid, + sh->entrybuf.ia_gfid))) + continue; + + afr_fresh_children_add_child (sh->fresh_children, + i, priv->child_count); - if (!type) { - gf_log (this->name, GF_LOG_ERROR, - "no source found for %s. all nodes down?. returning.", - local->loc.path); - /* subvolumes down and/or file does not exist */ - sh_missing_entries_finish (frame, this); - return 0; } + afr_sh_purge_entry_common (frame, this, + afr_sh_purge_stale_entry_condition); +} - if (enoent_count == 0) { - gf_log (this->name, GF_LOG_INFO, - "no missing files - %s. proceeding to metadata check", - local->loc.path); - /* proceed to next step - metadata self-heal */ - sh_missing_entries_finish (frame, this); - return 0; +void +afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs, + struct iatt *save, + unsigned int child_count) +{ + int i = 0; + int child = 0; + gf_boolean_t saved = _gf_false; + + GF_ASSERT (save); + //if iatt buf with gfid exists sets it + for (i = 0; i < child_count; i++) { + child = children[i]; + if (child == -1) + break; + *save = bufs[child]; + saved = _gf_true; + if (!uuid_is_null (save->ia_gfid)) + break; } + GF_ASSERT (saved); +} - switch (type) { - case IA_IFSOCK: - case IA_IFREG: - case IA_IFBLK: - case IA_IFCHR: - case IA_IFIFO: - sh_missing_entries_mknod (frame, this); - break; - case IA_IFLNK: - sh_missing_entries_readlink (frame, this); - break; - case IA_IFDIR: - sh_missing_entries_mkdir (frame, this); - break; - default: - gf_log (this->name, GF_LOG_ERROR, - "%s: unknown file type: 0%o", local->loc.path, type); +void +afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int32_t fresh_child_enoents = 0; + int32_t fresh_parent_count = 0; + int32_t op_errno = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + if (afr_get_children_count (sh->child_success, + priv->child_count) == 0) { + op_errno = afr_resultant_errno_get (NULL, sh->child_errno, + priv->child_count); + goto fail; + } + + //make intersection of (success_children & fresh_parent_dirs) fresh_children + //the other success_children will be added to it if they are not stale + afr_children_intersection_get (sh->child_success, + sh->fresh_parent_dirs, + sh->sources, priv->child_count); + afr_get_fresh_children (sh->child_success, sh->sources, + sh->fresh_children, priv->child_count); + memset (sh->sources, 0, sizeof (*sh->sources) * priv->child_count); + + fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs, + priv->child_count); + //we need the enoent count of the subvols present in fresh_parent_dirs + fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs, + sh->child_errno, + priv->child_count, ENOENT); + if (fresh_child_enoents == fresh_parent_count) { + afr_sh_set_error (sh, ENOENT); + sh->op_failed = 1; + afr_sh_purge_entry (frame, this); + } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children, + priv->child_count, local->loc.path, + this->name)) { + afr_sh_save_child_iatts_from_policy (sh->fresh_children, + sh->buf, &sh->entrybuf, + priv->child_count); + afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf, + sh->fresh_children, + priv->child_count); + afr_sh_purge_stale_entry (frame, this); + } else { + op_errno = EIO; local->govinda_gOvinda = 1; - sh_missing_entries_finish (frame, this); + goto fail; } - return 0; -} + return; +fail: + afr_sh_set_error (sh, op_errno); + afr_sh_missing_entries_finish (frame, this); + return; +} static int -sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xattr, - struct iatt *postparent) +afr_sh_children_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, + struct iatt *postparent) { - int child_index = 0; - afr_local_t *local = NULL; int call_count = 0; + + afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, + op_errno, inode, buf, xattr, + postparent); + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_children_lookup_done (frame, this); + + return 0; +} + +static int +afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this) +{ + afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; - mode_t st_mode = 0; + afr_local_t *local = NULL; + int enoent_count = 0; + int nsources = 0; + int source = -1; local = frame->local; + sh = &local->self_heal; priv = this->private; - child_index = (long) cookie; - - if (buf) - st_mode = st_mode_from_ia (buf->ia_prot, buf->ia_type); + /* If We can't find a fresh parent directory here, + * we wont know which subvol is correct without finding a parent dir + * upwards which has correct xattrs, for that we may have to + * do lookups till root, we dont wanna do that, + * instead make sure that if there are conflicting gfid + * parent dirs, self-heal thus lookup is failed with EIO. + * if there are missing entries we dont know whether to delete or + * create so fail with EIO, + * If there are conflicting xattr fail with EIO. + */ + if (afr_get_children_count (sh->child_success, + priv->child_count) == 0) { + gf_log (this->name, GF_LOG_ERROR, "Parent dir lookup failed " + "for %s, in missing entry self-heal, continuing with " + "the rest of the self-heals", local->loc.path); + goto out; + } - LOCK (&frame->lock); - { - if (op_ret == 0) { - gf_log (this->name, GF_LOG_TRACE, - "path %s on subvolume %s is of mode 0%o", - local->loc.path, - priv->children[child_index]->name, - st_mode); + enoent_count = afr_errno_count (NULL, sh->child_errno, + priv->child_count, ENOENT); + if (enoent_count > 0) { + gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s," + " in missing entry self-heal, continuing with the rest" + " of the self-heals", local->loc.path); + goto out; + } - local->self_heal.buf[child_index] = *buf; - local->self_heal.parentbuf = *postparent; - } else { - gf_log (this->name, GF_LOG_INFO, - "path %s on subvolume %s => -1 (%s)", - local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); + if (afr_conflicting_iattrs (sh->buf, sh->child_success, + priv->child_count, sh->parent_loc.path, + this->name)) { + gf_log (this->name, GF_LOG_INFO, "conflicting stat info for " + "parent dirs of %s", local->loc.path); + goto out; + } - local->self_heal.child_errno[child_index] = op_errno; - } + nsources = afr_build_sources (this, sh->xattr, sh->buf, + sh->pending_matrix, sh->sources, + sh->child_success, + AFR_ENTRY_TRANSACTION); + if (nsources < 0) { + gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," + " in missing entry self-heal, continuing with the rest" + " of the self-heals", local->loc.path); + goto out; + } + source = afr_sh_select_source (sh->sources, priv->child_count); + if (source == -1) { + GF_ASSERT (0); + gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); + goto out; } - UNLOCK (&frame->lock); + afr_get_fresh_children (sh->child_success, sh->sources, + sh->fresh_parent_dirs, priv->child_count); + afr_sh_common_lookup (frame, this, &local->loc, + afr_sh_children_lookup_cbk, _gf_false); + return 0; + +out: + afr_sh_set_error (sh, EIO); + sh->op_failed = 1; + afr_sh_missing_entries_finish (frame, this); + return 0; +} +int +afr_sh_conflicting_entry_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, + dict_t *xattr, struct iatt *postparent) +{ + int call_count = 0; + + afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, + op_errno, inode, buf, xattr, + postparent); call_count = afr_frame_return (frame); - if (call_count == 0) { - sh_missing_entries_create (frame, this); - } + if (call_count == 0) + afr_sh_find_fresh_parents (frame, this); return 0; } +void +afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count) +{ + int i = 0; -static int -sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) + for (i = 0; i < child_count; i++) { + memset (&sh->buf[i], 0, sizeof (sh->buf[i])); + memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i])); + sh->child_errno[i] = 0; + } + memset (&sh->parentbuf, 0, sizeof (sh->parentbuf)); + sh->success_count = 0; + afr_reset_children (sh->child_success, child_count); + afr_reset_children (sh->fresh_children, child_count); + afr_reset_xattr (sh->xattr, child_count); +} + +/* afr self-heal state will be lost if this call is made + * please check the afr_sh_common_reset that is called in this function + */ +int +afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid) { afr_local_t *local = NULL; int i = 0; int call_count = 0; afr_private_t *priv = NULL; dict_t *xattr_req = NULL; - int ret = -1; + afr_self_heal_t *sh = NULL; local = frame->local; priv = this->private; + sh = &local->self_heal; call_count = afr_up_children_count (priv->child_count, local->child_up); @@ -1445,29 +1724,29 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) xattr_req = dict_new(); if (xattr_req) { - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (xattr_req, - priv->pending_key[i], - 3 * sizeof(int32_t)); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set value for %s", - local->loc.path, priv->pending_key[i]); + afr_xattr_req_prepare (this, xattr_req, loc->path); + if (set_gfid) { + gf_log (this->name, GF_LOG_DEBUG, + "looking up %s with gfid: %s", + local->loc.path, uuid_utoa (sh->sh_gfid_req)); + GF_ASSERT (!uuid_is_null (sh->sh_gfid_req)); + afr_set_dict_gfid (xattr_req, sh->sh_gfid_req); } } + afr_sh_common_reset (sh, priv->child_count); for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "looking up %s on subvolume %s", local->loc.path, priv->children[i]->name); STACK_WIND_COOKIE (frame, - sh_missing_entries_lookup_cbk, + lookup_cbk, (void *) (long) i, priv->children[i], priv->children[i]->fops->lookup, - &local->loc, xattr_req); + loc, xattr_req); if (!--call_count) break; @@ -1483,13 +1762,15 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) int -afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) +afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; local = frame->local; int_lock = &local->internal_lock; + sh = &local->self_heal; if (int_lock->lock_op_ret < 0) { gf_log (this->name, GF_LOG_INFO, @@ -1499,14 +1780,41 @@ afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "Non blocking entrylks done. Proceeding to FOP"); - sh_missing_entries_lookup (frame, this); + afr_sh_common_lookup (frame, this, &sh->parent_loc, + afr_sh_conflicting_entry_lookup_cbk, + _gf_false); } return 0; } -static int -afr_sh_entrylk (call_frame_t *frame, xlator_t *this) +int +afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_INFO, + "Non blocking entrylks failed."); + afr_sh_missing_entries_done (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking entrylks done. Proceeding to FOP"); + afr_sh_common_lookup (frame, this, &local->loc, + afr_sh_missing_entries_lookup_cbk, + _gf_true); + } + + return 0; +} + +int +afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, + char *base_name, afr_lock_cbk_t lock_cbk) { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; @@ -1521,9 +1829,9 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this) afr_set_lock_number (frame, this); - int_lock->lk_basename = local->loc.name; - int_lock->lk_loc = &sh->parent_loc; - int_lock->lock_cbk = afr_sh_post_nonblocking_entrylk_cbk; + int_lock->lk_basename = base_name; + int_lock->lk_loc = loc; + int_lock->lock_cbk = lock_cbk; afr_nonblocking_entrylk (frame, this); @@ -1531,7 +1839,8 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this) } static int -afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) +afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this, + afr_lock_cbk_t lock_cbk) { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; @@ -1547,9 +1856,27 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) "attempting to recreate missing entries for path=%s", local->loc.path); + GF_ASSERT (local->loc.parent); afr_build_parent_loc (&sh->parent_loc, &local->loc); - afr_sh_entrylk (frame, this); + afr_sh_entrylk (frame, this, &sh->parent_loc, NULL, + lock_cbk); + return 0; +} + +static int +afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this) +{ + afr_self_heal_parent_entrylk (frame, this, + afr_sh_post_nb_entrylk_conflicting_sh_cbk); + return 0; +} + +static int +afr_self_heal_gfids (call_frame_t *frame, xlator_t *this) +{ + afr_self_heal_parent_entrylk (frame, this, + afr_sh_post_nb_entrylk_gfid_sh_cbk); return 0; } @@ -1572,6 +1899,9 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this) shc = &lc->self_heal; shc->unwind = sh->unwind; + shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk; + shc->need_missing_entry_self_heal = sh->need_missing_entry_self_heal; + shc->need_gfid_self_heal = sh->need_gfid_self_heal; shc->need_data_self_heal = sh->need_data_self_heal; shc->need_metadata_self_heal = sh->need_metadata_self_heal; shc->need_entry_self_heal = sh->need_entry_self_heal; @@ -1585,6 +1915,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this) shc->background = sh->background; shc->type = sh->type; + uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req); if (l->loc.path) loc_copy (&lc->loc, &l->loc); @@ -1640,18 +1971,16 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) afr_local_t * local = NULL; afr_self_heal_t * sh = NULL; char sh_type_str[256] = {0,}; + gf_boolean_t split_brain = _gf_false; priv = this->private; local = bgsh_frame->local; sh = &local->self_heal; - if (local->govinda_gOvinda) { - afr_set_split_brain (this, local->cont.lookup.inode, - _gf_true); - } else { - afr_set_split_brain (this, local->cont.lookup.inode, - _gf_false); - } + if (local->govinda_gOvinda) + split_brain = _gf_true; + + afr_set_split_brain (this, sh->inode, split_brain); afr_self_heal_type_str_get(sh, sh_type_str, sizeof(sh_type_str)); @@ -1668,7 +1997,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) FRAME_SU_UNDO (bgsh_frame, afr_local_t); if (!sh->unwound) { - sh->unwind (sh->orig_frame, this); + sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno); } if (sh->background) { @@ -1685,7 +2014,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) } int -afr_self_heal (call_frame_t *frame, xlator_t *this) +afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) { afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; @@ -1730,11 +2059,14 @@ afr_self_heal (call_frame_t *frame, xlator_t *this) sh = &sh_local->self_heal; sh->orig_frame = frame; + sh->inode = inode_ref (inode); sh->completion_cbk = afr_self_heal_completion_cbk; sh->buf = GF_CALLOC (priv->child_count, sizeof (struct iatt), gf_afr_mt_iatt); + sh->parentbufs = GF_CALLOC (priv->child_count, sizeof (struct iatt), + gf_afr_mt_iatt); sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int), gf_afr_mt_int); sh->success = GF_CALLOC (priv->child_count, sizeof (int), @@ -1763,13 +2095,17 @@ afr_self_heal (call_frame_t *frame, xlator_t *this) priv->child_count, gf_afr_mt_int32_t); } - sh->child_success = GF_CALLOC (sizeof (*sh->child_success), - priv->child_count, gf_afr_mt_int32_t); + sh->child_success = afr_fresh_children_create (priv->child_count); + sh->fresh_children = afr_fresh_children_create (priv->child_count); + sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count); FRAME_SU_DO (sh_frame, afr_local_t); - if (local->success_count && local->enoent_count) { - afr_self_heal_missing_entries (sh_frame, this); + if (sh->need_missing_entry_self_heal) { + afr_self_heal_conflicting_entries (sh_frame, this); + } else if (sh->need_gfid_self_heal) { + GF_ASSERT (!uuid_is_null (sh->sh_gfid_req)); + afr_self_heal_gfids (sh_frame, this); } else { gf_log (this->name, GF_LOG_TRACE, "proceeding to metadata check on %s", @@ -1785,18 +2121,28 @@ void afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str, size_t size) { - GF_ASSERT (str && (size > strlen (" meta-data data entry"))); + GF_ASSERT (str && (size > strlen (" missing-entry gfid " + "meta-data data entry"))); if (self_heal_p->need_metadata_self_heal) { - snprintf(str, size, " meta-data"); + snprintf (str, size, " meta-data"); } if (self_heal_p->need_data_self_heal) { - snprintf(str + strlen(str), size - strlen(str), " data"); + snprintf (str + strlen(str), size - strlen(str), " data"); } if (self_heal_p->need_entry_self_heal) { - snprintf(str + strlen(str), size - strlen(str), " entry"); + snprintf (str + strlen(str), size - strlen(str), " entry"); + } + + if (self_heal_p->need_missing_entry_self_heal) { + snprintf (str + strlen(str), size - strlen(str), + " missing-entry"); + } + + if (self_heal_p->need_gfid_self_heal) { + snprintf (str + strlen(str), size - strlen(str), " gfid"); } } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 676e933ae1c..043ebea2da6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -29,6 +29,11 @@ typedef enum { AFR_SELF_HEAL_INVALID = -1, } afr_self_heal_type; +typedef int +(*afr_lookup_cbk_t) (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, + struct iatt *postparent); int afr_sh_select_source (int sources[], int child_count); @@ -71,4 +76,23 @@ afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str, afr_self_heal_type afr_self_heal_type_for_transaction (afr_transaction_type type); +int +afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs, + int32_t **pending_matrix, int32_t *sources, + int32_t *success_children, afr_transaction_type type); +void +afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count); +int +afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid); +int +afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, + int active_src, struct iatt *buf); +int +afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, + char *base_name, afr_lock_cbk_t lock_cbk); +int +afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this, + int child_index, struct iatt *buf, + struct iatt *postparent); #endif /* __AFR_SELF_HEAL_COMMON_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 38799db7042..6ce7106698d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -282,7 +282,7 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "finishing data selfheal of %s", local->loc.path); if (!sh->data_lock_held) @@ -605,7 +605,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) sh->child_success, this->name); if (nsources == 0) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "No self-heal needed for %s", local->loc.path); @@ -682,7 +682,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]); if (sh->background) { - sh->unwind (sh->orig_frame, this); + sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno); sh->unwound = _gf_true; } @@ -793,7 +793,6 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local, afr_private_t *priv = NULL; int read_child = -1; int ret = -1; - afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID; int32_t **pending_matrix = NULL; int32_t *sources = NULL; int32_t *valid_children = NULL; @@ -802,6 +801,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local, int32_t prev_read_child = -1; int32_t config_read_child = -1; afr_self_heal_t *sh = NULL; + afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID; priv = this->private; bufs = local->cont.lookup.bufs; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 8c619ff45a5..3977ae101f6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -49,9 +49,6 @@ #include "afr-self-heal.h" #include "afr-self-heal-common.h" -int -afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this); - int afr_sh_entry_done (call_frame_t *frame, xlator_t *this) { @@ -362,7 +359,8 @@ afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this, int afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this, - int active_src) + int active_src, int32_t op_ret, + int32_t op_errno) { int call_count = 0; @@ -385,21 +383,25 @@ afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame, afr_self_heal_t *expunge_sh = NULL; call_frame_t *frame = NULL; int active_src = (long) cookie; + afr_self_heal_t *sh = NULL; + afr_local_t *local = NULL; priv = this->private; expunge_local = expunge_frame->local; expunge_sh = &expunge_local->self_heal; frame = expunge_sh->sh_frame; + local = frame->local; + sh = &local->self_heal; if (op_ret != 0) { - gf_log (this->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_ERROR, "setattr on parent directory of %s on subvolume %s failed: %s", expunge_local->loc.path, priv->children[active_src]->name, strerror (op_errno)); } AFR_STACK_DESTROY (expunge_frame); - afr_sh_entry_expunge_entry_done (frame, this, active_src); + sh->expunge_done (frame, this, active_src, op_ret, op_errno); return 0; } @@ -510,15 +512,17 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, afr_private_t *priv = NULL; afr_local_t *expunge_local = NULL; afr_self_heal_t *expunge_sh = NULL; - int source = 0; call_frame_t *frame = NULL; int type = 0; + afr_self_heal_t *sh = NULL; + afr_local_t *local = NULL; priv = this->private; expunge_local = expunge_frame->local; expunge_sh = &expunge_local->self_heal; frame = expunge_sh->sh_frame; - source = expunge_sh->source; + local = frame->local; + sh = &local->self_heal; type = buf->ia_type; @@ -538,7 +542,7 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, gf_log (this->name, GF_LOG_ERROR, "%s has unknown file type on %s: 0%o", expunge_local->loc.path, - priv->children[source]->name, type); + priv->children[active_src]->name, type); goto out; break; } @@ -546,7 +550,7 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, return 0; out: AFR_STACK_DESTROY (expunge_frame); - afr_sh_entry_expunge_entry_done (frame, this, active_src); + sh->expunge_done (frame, this, active_src, -1, EINVAL); return 0; } @@ -564,15 +568,19 @@ afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie, afr_self_heal_t *expunge_sh = NULL; call_frame_t *frame = NULL; int active_src = 0; + afr_self_heal_t *sh = NULL; + afr_local_t *local = NULL; priv = this->private; expunge_local = expunge_frame->local; expunge_sh = &expunge_local->self_heal; frame = expunge_sh->sh_frame; active_src = (long) cookie; + local = frame->local; + sh = &local->self_heal; if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_ERROR, "lookup of %s on %s failed (%s)", expunge_local->loc.path, priv->children[active_src]->name, @@ -585,7 +593,7 @@ afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie, return 0; out: AFR_STACK_DESTROY (expunge_frame); - afr_sh_entry_expunge_entry_done (frame, this, active_src); + sh->expunge_done (frame, this, active_src, op_ret, op_errno); return 0; } @@ -628,7 +636,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie, call_frame_t *frame = NULL; int active_src = 0; int need_expunge = 0; - + afr_self_heal_t *sh = NULL; + afr_local_t *local = NULL; priv = this->private; expunge_local = expunge_frame->local; @@ -636,6 +645,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie, frame = expunge_sh->sh_frame; active_src = expunge_sh->active_source; source = (long) cookie; + local = frame->local; + sh = &local->self_heal; if (op_ret == -1 && op_errno == ENOENT) need_expunge = 1; @@ -685,7 +696,7 @@ out: } AFR_STACK_DESTROY (expunge_frame); - afr_sh_entry_expunge_entry_done (frame, this, active_src); + sh->expunge_done (frame, this, active_src, op_ret, op_errno); return 0; } @@ -706,6 +717,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, int source = 0; int op_errno = 0; char *name = NULL; + int op_ret = -1; priv = this->private; local = frame->local; @@ -713,6 +725,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, active_src = sh->active_source; source = sh->source; + sh->expunge_done = afr_sh_entry_expunge_entry_done; name = entry->d_name; @@ -724,6 +737,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_TRACE, "skipping inspection of %s under %s", name, local->loc.path); + op_ret = 0; goto out; } @@ -733,6 +747,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, expunge_frame = copy_frame (frame); if (!expunge_frame) { + op_errno = ENOMEM; goto out; } @@ -746,6 +761,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, ret = build_child_loc (this, &expunge_local->loc, &local->loc, name); if (ret != 0) { + op_errno = EINVAL; goto out; } @@ -763,7 +779,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, ret = 0; out: if (ret == -1) - afr_sh_entry_expunge_entry_done (frame, this, active_src); + sh->expunge_done (frame, this, active_src, op_ret, op_errno); return 0; } @@ -896,7 +912,8 @@ out: int afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this, - int active_src) + int active_src, int32_t op_ret, + int32_t op_errno) { int call_count = 0; @@ -924,6 +941,7 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie, call_frame_t *frame = NULL; int active_src = 0; int child_index = 0; + int32_t impunge_ret_child = 0; priv = this->private; impunge_local = impunge_frame->local; @@ -931,7 +949,7 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie, frame = impunge_sh->sh_frame; local = frame->local; sh = &local->self_heal; - active_src = sh->active_source; + active_src = impunge_sh->active_source; child_index = (long) cookie; if (op_ret == 0) { @@ -954,8 +972,10 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie, UNLOCK (&impunge_frame->lock); if (call_count == 0) { + impunge_ret_child = impunge_sh->impunge_ret_child; AFR_STACK_DESTROY (impunge_frame); - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, impunge_ret_child, op_ret, + op_errno); } return 0; @@ -972,9 +992,8 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie, afr_local_t *impunge_local = NULL; afr_self_heal_t *impunge_sh = NULL; int child_index = 0; - - struct iatt stbuf; - int32_t valid = 0; + struct iatt stbuf = {0}; + int32_t valid = 0; priv = this->private; impunge_local = impunge_frame->local; @@ -1066,6 +1085,7 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie, int32_t valid = 0; loc_t *parent_loc = NULL; struct iatt parentbuf = {0,}; + int32_t impunge_ret_child = 0; priv = this->private; impunge_local = impunge_frame->local; @@ -1073,7 +1093,7 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie, frame = impunge_sh->sh_frame; local = frame->local; sh = &local->self_heal; - active_src = sh->active_source; + active_src = impunge_sh->active_source; child_index = (long) cookie; @@ -1115,6 +1135,7 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie, pending_array, 3 * sizeof (int32_t)); if (ret < 0) { GF_FREE (pending_array); + pending_array = NULL; gf_log (this->name, GF_LOG_WARNING, "Unable to set dict value."); } @@ -1162,8 +1183,10 @@ out: UNLOCK (&impunge_frame->lock); if (call_count == 0) { + impunge_ret_child = impunge_sh->impunge_ret_child; AFR_STACK_DESTROY (impunge_frame); - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, impunge_ret_child, -1, + op_errno); } return 0; @@ -1262,14 +1285,23 @@ int afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this, int child_index, const char *linkname) { - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - dict_t *dict = NULL; - struct iatt *buf = NULL; - int ret = 0; + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + dict_t *dict = NULL; + struct iatt *buf = NULL; + int ret = 0; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_self_heal_t *impunge_sh = NULL; + int32_t impunge_ret_child = 0; priv = this->private; impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + local = frame->local; + sh = &local->self_heal; buf = &impunge_local->cont.symlink.buf; @@ -1277,7 +1309,11 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this, if (!dict) { gf_log (this->name, GF_LOG_ERROR, "Out of memory"); - afr_sh_entry_impunge_entry_done (impunge_frame, this, 0); + impunge_ret_child = impunge_sh->impunge_ret_child; + AFR_STACK_DESTROY (impunge_frame); + sh->impunge_done (impunge_frame, this, impunge_ret_child, -1, + ENOMEM); + goto out; } ret = afr_set_dict_gfid (dict, buf->ia_gfid); @@ -1299,7 +1335,7 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this, if (dict) dict_unref (dict); - +out: return 0; } @@ -1318,11 +1354,16 @@ afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame, call_frame_t *frame = NULL; int call_count = -1; int active_src = -1; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int32_t impunge_ret_child = 0; priv = this->private; impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; frame = impunge_sh->sh_frame; + local = frame->local; + sh = &local->self_heal; active_src = impunge_sh->active_source; child_index = (long) cookie; @@ -1348,8 +1389,10 @@ out: UNLOCK (&impunge_frame->lock); if (call_count == 0) { + impunge_ret_child = impunge_sh->impunge_ret_child; AFR_STACK_DESTROY (impunge_frame); - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, impunge_ret_child, op_ret, + op_errno); } return 0; @@ -1394,11 +1437,16 @@ afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cooki call_frame_t *frame = NULL; int call_count = -1; int active_src = -1; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int32_t impunge_ret_child = 0; priv = this->private; impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; frame = impunge_sh->sh_frame; + local = frame->local; + sh = &local->self_heal; active_src = impunge_sh->active_source; child_index = (long) cookie; @@ -1444,8 +1492,10 @@ out: UNLOCK (&impunge_frame->lock); if (call_count == 0) { + impunge_ret_child = impunge_sh->impunge_ret_child; AFR_STACK_DESTROY (impunge_frame); - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, impunge_ret_child, op_ret, + op_errno); } return 0; @@ -1489,11 +1539,16 @@ afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie, call_frame_t *frame = NULL; int call_count = -1; int active_src = -1; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int32_t impunge_ret_child = 0; priv = this->private; impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; frame = impunge_sh->sh_frame; + local = frame->local; + sh = &local->self_heal; active_src = impunge_sh->active_source; child_index = (long) cookie; @@ -1520,8 +1575,10 @@ out: UNLOCK (&impunge_frame->lock); if (call_count == 0) { + impunge_ret_child = impunge_sh->impunge_ret_child; AFR_STACK_DESTROY (impunge_frame); - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, impunge_ret_child, op_ret, + op_errno); } return 0; @@ -1552,44 +1609,22 @@ afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this, return 0; } - int -afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, - dict_t *xattr,struct iatt *postparent) +afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this, + int child_index, struct iatt *buf, + struct iatt *postparent) { - afr_private_t *priv = NULL; afr_local_t *impunge_local = NULL; afr_self_heal_t *impunge_sh = NULL; - int active_src = 0; - int type = 0; - int child_index = 0; - call_frame_t *frame = NULL; - int call_count = 0; + afr_private_t *priv = NULL; + ia_type_t type = IA_INVAL; + int ret = 0; + int active_src = 0; - priv = this->private; impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; - frame = impunge_sh->sh_frame; - - child_index = (long) cookie; - - active_src = impunge_sh->active_source; - - if (op_ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "looking up %s on %s (for %s) failed (%s)", - impunge_local->loc.path, - priv->children[active_src]->name, - priv->children[child_index]->name, - strerror (op_errno)); - goto out; - } - impunge_sh->parentbuf = *postparent; - + active_src = impunge_sh->active_source; impunge_local->cont.lookup.buf = *buf; afr_update_loc_gfids (&impunge_local->loc, buf, postparent); @@ -1617,10 +1652,58 @@ afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame, "%s has unknown file type on %s: 0%o", impunge_local->loc.path, priv->children[active_src]->name, type); - goto out; + ret = -1; break; } + return ret; +} + +int +afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, + dict_t *xattr,struct iatt *postparent) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *impunge_sh = NULL; + afr_self_heal_t *sh = NULL; + int active_src = 0; + int child_index = 0; + call_frame_t *frame = NULL; + int call_count = 0; + int ret = 0; + int32_t impunge_ret_child = 0; + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + local = frame->local; + sh = &local->self_heal; + + child_index = (long) cookie; + + active_src = impunge_sh->active_source; + + if (op_ret != 0) { + gf_log (this->name, GF_LOG_DEBUG, + "looking up %s on %s (for %s) failed (%s)", + impunge_local->loc.path, + priv->children[active_src]->name, + priv->children[child_index]->name, + strerror (op_errno)); + goto out; + } + + ret = afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf, + postparent); + if (ret) + goto out; + return 0; out: @@ -1631,8 +1714,10 @@ out: UNLOCK (&impunge_frame->lock); if (call_count == 0) { + impunge_ret_child = impunge_sh->impunge_ret_child; AFR_STACK_DESTROY (impunge_frame); - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, impunge_ret_child, op_ret, + op_errno); } return 0; @@ -1680,11 +1765,16 @@ afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie, int child_index = 0; call_frame_t *frame = NULL; int active_src = 0; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int32_t impunge_ret_child = 0; priv = this->private; impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; frame = impunge_sh->sh_frame; + local = frame->local; + sh = &local->self_heal; child_index = (long) cookie; active_src = impunge_sh->active_source; @@ -1730,8 +1820,10 @@ afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie, UNLOCK (&impunge_frame->lock); if (call_count == 0) { + impunge_ret_child = impunge_sh->impunge_ret_child; AFR_STACK_DESTROY (impunge_frame); - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, impunge_ret_child, op_ret, + op_errno); } return 0; @@ -1753,12 +1845,14 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, int i = 0; int call_count = 0; int op_errno = 0; + int op_ret = -1; priv = this->private; local = frame->local; sh = &local->self_heal; active_src = sh->active_source; + sh->impunge_done = afr_sh_entry_impunge_entry_done; if ((strcmp (entry->d_name, ".") == 0) || (strcmp (entry->d_name, "..") == 0) @@ -1768,6 +1862,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_TRACE, "skipping inspection of %s under %s", entry->d_name, local->loc.path); + op_ret = 0; goto out; } @@ -1779,6 +1874,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, if (!impunge_frame) { gf_log (this->name, GF_LOG_ERROR, "Out of memory."); + op_errno = ENOMEM; goto out; } @@ -1788,12 +1884,14 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, impunge_sh = &impunge_local->self_heal; impunge_sh->sh_frame = frame; impunge_sh->active_source = active_src; + impunge_sh->impunge_ret_child = active_src; impunge_sh->impunging_entry_mode = st_mode_from_ia (entry->d_stat.ia_prot, entry->d_stat.ia_type); ret = build_child_loc (this, &impunge_local->loc, &local->loc, entry->d_name); if (ret != 0) { + op_errno = ENOMEM; goto out; } @@ -1835,7 +1933,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, ret = 0; out: if (ret == -1) - afr_sh_entry_impunge_entry_done (frame, this, active_src); + sh->impunge_done (frame, this, active_src, op_ret, op_errno); return 0; } @@ -2225,63 +2323,6 @@ afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie, return 0; } - - -int -afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this) -{ - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - dict_t *xattr_req = NULL; - int ret = 0; - int call_count = 0; - int i = 0; - afr_self_heal_t *sh = NULL; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - call_count = afr_up_children_count (priv->child_count, - local->child_up); - - local->call_count = call_count; - - xattr_req = dict_new(); - if (xattr_req) { - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (xattr_req, - priv->pending_key[i], - 3 * sizeof(int32_t)); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "%s: Unable to set dict value.", - local->loc.path); - } - } - - for (i = 0; i < priv->child_count; i++) - sh->child_success[i] = -1; - sh->success_count = 0; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, - afr_sh_entry_lookup_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->lookup, - &local->loc, xattr_req); - if (!--call_count) - break; - } - } - - if (xattr_req) - dict_unref (xattr_req); - - return 0; -} - int afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) { @@ -2302,37 +2343,13 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done " "for %s. Proceeding to FOP", local->loc.path); - afr_sh_entry_lookup(frame, this); + afr_sh_common_lookup (frame, this, &local->loc, + afr_sh_entry_lookup_cbk, _gf_false); } return 0; } -int -afr_sh_entry_lock (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - - int_lock->transaction_lk_type = AFR_SELFHEAL_LK; - int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK; - - afr_set_lock_number (frame, this); - - int_lock->lk_basename = NULL; - int_lock->lk_loc = &local->loc; - int_lock->lock_cbk = afr_sh_post_nonblocking_entry_cbk; - - afr_nonblocking_entrylk (frame, this); - - - return 0; -} - - int afr_self_heal_entry (call_frame_t *frame, xlator_t *this) { @@ -2344,7 +2361,8 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this) local = frame->local; if (local->self_heal.need_entry_self_heal && priv->entry_self_heal) { - afr_sh_entry_lock (frame, this); + afr_sh_entrylk (frame, this, &local->loc, NULL, + afr_sh_post_nonblocking_entry_cbk); } else { gf_log (this->name, GF_LOG_TRACE, "proceeding to completion on %s", diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 1214eefe25b..03b91dae300 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -604,64 +604,6 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } - -int -afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = 0; - dict_t *xattr_req = NULL; - int ret = 0; - afr_self_heal_t *sh = NULL; - - local = frame->local; - priv = this->private; - sh = &local->self_heal; - - call_count = afr_up_children_count (priv->child_count, - local->child_up); - local->call_count = call_count; - - xattr_req = dict_new(); - - if (xattr_req) { - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (xattr_req, - priv->pending_key[i], - 3 * sizeof(int32_t)); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "Unable to set dict value."); - } - } - - for (i = 0; i < priv->child_count; i++) - sh->child_success[i] = -1; - sh->success_count = 0; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - gf_log (this->name, GF_LOG_TRACE, - "looking up %s on %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->lookup, - &local->loc, xattr_req); - if (!--call_count) - break; - } - } - - if (xattr_req) - dict_unref (xattr_req); - - return 0; -} - int afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) @@ -683,7 +625,8 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame, gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata " "inodelks done for %s. Proceeding to FOP", local->loc.path); - afr_sh_metadata_lookup (frame, this); + afr_sh_common_lookup (frame, this, &local->loc, + afr_sh_metadata_lookup_cbk, _gf_false); } return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 00e9a1b1efb..5b211499acc 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -50,7 +50,7 @@ afr_self_heal_find_sources (xlator_t *this, afr_local_t *local, dict_t **xattr, afr_transaction_type transaction_type); int -afr_self_heal (call_frame_t *frame, xlator_t *this); +afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode); gf_boolean_t afr_is_fresh_read_child (int32_t *sources, int32_t child_count, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 8f7f54fafe0..bc85fd71d15 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -37,6 +37,17 @@ struct _pump_private; +typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this, + int child, int32_t op_error, + int32_t op_errno); + +typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this, + int child, int32_t op_error, + int32_t op_errno); +typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this); + +typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); + typedef struct _afr_private { gf_lock_t lock; /* to guard access to child_count, etc */ unsigned int child_count; /* total number of children */ @@ -101,9 +112,12 @@ typedef struct { /* External interface: These are variables (some optional) that are set by whoever has triggered self-heal */ + inode_t *inode; gf_boolean_t need_data_self_heal; gf_boolean_t need_metadata_self_heal; gf_boolean_t need_entry_self_heal; + gf_boolean_t need_gfid_self_heal; + gf_boolean_t need_missing_entry_self_heal; gf_boolean_t forced_merge; /* Is this a self-heal triggered to forcibly merge the directories? */ @@ -121,20 +135,28 @@ typedef struct { ia_type_t type; /* st_mode of the entry we're doing self-heal on */ + uuid_t sh_gfid_req; /* gfid self-heal needs to be done + with this gfid if it is not null */ /* Function to call to unwind. If self-heal is being done in the background, this function will be called as soon as possible. */ - int (*unwind) (call_frame_t *frame, xlator_t *this); + int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret, + int32_t op_errno); /* End of external interface members */ /* array of stat's, one for each child */ struct iatt *buf; + struct iatt *parentbufs; struct iatt parentbuf; struct iatt entrybuf; + afr_expunge_done_cbk_t expunge_done; + afr_impunge_done_cbk_t impunge_done; + int32_t impunge_ret_child; + /* array of xattr's, one for each child */ dict_t **xattr; @@ -142,12 +164,19 @@ typedef struct { */ int32_t *child_success; int success_count; + /* array containing the fresh children found in the self-heal process */ + int32_t *fresh_children; + /* array containing the fresh children found in the parent lookup */ + int32_t *fresh_parent_dirs; /* array of errno's, one for each child */ int *child_errno; int32_t **pending_matrix; int32_t **delta_matrix; + int32_t op_ret; + int32_t op_errno; + int *sources; int source; int active_source; @@ -165,6 +194,7 @@ typedef struct { blksize_t block_size; off_t file_size; off_t offset; + afr_post_remove_call_t post_remove_call; loc_t parent_loc; @@ -179,6 +209,7 @@ typedef struct { int (*completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this); + void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this); call_frame_t *sh_frame; } afr_self_heal_t; @@ -343,6 +374,7 @@ typedef struct _afr_local { } statfs; struct { + uuid_t gfid_req; inode_t *inode; struct iatt buf; struct iatt postparent; @@ -958,4 +990,47 @@ int32_t afr_marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv ); +void +afr_get_fresh_children (int32_t *success_children, int32_t *sources, + int32_t *fresh_children, unsigned int child_count); +void +afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, + int32_t child_count); +void +afr_reset_children (int32_t *fresh_children, int32_t child_count); +gf_boolean_t +afr_error_more_important (int32_t old_errno, int32_t new_errno); +int +afr_errno_count (int32_t *children, int *child_errno, + unsigned int child_count, int32_t op_errno); +int +afr_get_children_count (int32_t *fresh_children, unsigned int child_count); +gf_boolean_t +afr_is_child_present (int32_t *success_children, int32_t child_count, + int32_t child); +void +afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs, + int32_t *success_children, + unsigned int child_count); +void +afr_reset_xattr (dict_t **xattr, unsigned int child_count); +gf_boolean_t +afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children, + unsigned int child_count, const char *path, + const char *xlator_name); +int +afr_gfid_missing_count (const char *xlator_name, int32_t *children, + struct iatt *bufs, unsigned int child_count, + const char *path); +void +afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path); +void +afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count); +afr_transaction_type +afr_transaction_type_get (ia_type_t ia_type); +int32_t +afr_resultant_errno_get (int32_t *children, + int *child_errno, unsigned int child_count); +int32_t* +afr_fresh_children_create (int32_t child_count); #endif /* __AFR_H__ */ -- cgit