From 83937d1666f1f5e395afc7b2df477e9aad4c2278 Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Wed, 31 Jul 2013 14:45:03 +0530 Subject: Revert "cluster/dht: Return success in dht_discover if layout issues" This reverts commit a3e593f9f17cb1e68db97bb5a0d8074793a33964 which was bought into fix dht_layout_anomalies error handling. We still see applications error'ing out due to graph switches. To fix the above issue - We cannot heal in dht_discover, as it is a gfid based lookup, and not path based. So, returning error here would lead to app's to see failure. Also, update the layout in inode_ctx even if it has anomalies. Let subsequent heals fix the issue. Conflicts: xlators/cluster/dht/src/dht-common.c Signed-off-by: shishir gowda Change-Id: I68c1056c3587e04a02344548546ddd06034489c5 BUG: 960348 Reviewed-on: http://review.gluster.org/5443 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-common.c | 52 +++++++++++------------------------- xlators/cluster/dht/src/dht-common.h | 32 +++++++++------------- xlators/cluster/dht/src/dht-layout.c | 21 ++++++--------- 3 files changed, 35 insertions(+), 70 deletions(-) diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index b1fee655b..57f75c9aa 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -154,7 +154,6 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) int ret = -1; dht_layout_t *layout = NULL; dht_conf_t *conf = NULL; - uint32_t missing = 0; local = discover_frame->local; layout = local->layout; @@ -191,33 +190,20 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) goto out; } } else { - ret = dht_layout_normalize (this, &local->loc, layout, - &missing); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "normalizing failed on %s (internal error)", - local->loc.path); - op_errno = EIO; - goto out; - } - if (missing == conf->subvolume_cnt) { + ret = dht_layout_normalize (this, &local->loc, layout); + if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) { + /* either the layout is incorrect or the directory is + * not found even in one subvolume. + */ gf_log (this->name, GF_LOG_DEBUG, - "normalizing failed on %s, ENOENT errors: %u)", - local->loc.path, missing); - op_errno = ENOENT; - goto out; - } - if (ret != 0) { - gf_log (this->name, GF_LOG_WARNING, "normalizing failed on %s " - "(overlaps/holes present)", local->loc.path); - /* We may need to do the lookup again */ - /* in discover call, parent is not know, and basename - * of entry is also not available. Without which we - * cannot build a layout correctly to heal it. Hence - * returning ESTALE */ - op_errno = ESTALE; - goto out; + "(overlaps/holes present: %s, " + "ENOENT errors: %d)", local->loc.path, + (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0); + if ((ret > 0) && (ret == conf->subvolume_cnt)) { + op_errno = ESTALE; + goto out; + } } if (local->inode) @@ -421,7 +407,6 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_layout_t *layout = NULL; int ret = -1; int is_dir = 0; - uint32_t missing = 0; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -502,16 +487,9 @@ unlock: } if (local->op_ret == 0) { - ret = dht_layout_normalize (this, &local->loc, layout, - &missing); - - /* - * Arguably, we shouldn't do self-heal just because - * bricks are missing as long as there are no other - * anomalies. For now, though, just preserve the - * existing behavior. - */ - if ((ret != 0) || (missing != 0)) { + ret = dht_layout_normalize (this, &local->loc, layout); + + if (ret != 0) { gf_log (this->name, GF_LOG_DEBUG, "fixing assignment on %s", local->loc.path); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index bdaa8d64d..da8923e9b 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -400,26 +400,18 @@ typedef enum { } while (0) #define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))) - -dht_layout_t *dht_layout_new (xlator_t *this, int cnt); -dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); -dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); -xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, - const char *name); -int dht_layout_normalize (xlator_t *this, loc_t *loc, - dht_layout_t *layout, - uint32_t *missing_p); -int dht_layout_anomalies (xlator_t *this, loc_t *loc, - dht_layout_t *layout, - uint32_t *holes_p, - uint32_t *overlaps_p, - uint32_t *missing_p, - uint32_t *down_p, - uint32_t *misc_p, - uint32_t *no_space_p); -int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, - xlator_t *subvol, loc_t *loc, - dict_t *xattr); +dht_layout_t *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, + const char *name); +int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); +int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, + uint32_t *misc_p, uint32_t *no_space_p); +int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, + xlator_t *subvol, loc_t *loc, dict_t *xattr); xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *buf, dict_t *xattr); diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 23a6cb0ae..38e9970a7 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -601,8 +601,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, int -dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout, - uint32_t *missing_p) +dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) { int ret = 0; int i = 0; @@ -614,7 +613,6 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout, ret = dht_layout_sort (layout); if (ret == -1) { - /* defensive coding; this can't happen currently */ gf_log (this->name, GF_LOG_WARNING, "sort failed?! how the ...."); goto out; @@ -624,26 +622,23 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout, &holes, &overlaps, &missing, &down, &misc, NULL); if (ret == -1) { - /* defensive coding; this can't happen currently */ gf_log (this->name, GF_LOG_WARNING, "error while finding anomalies in %s -- not good news", loc->path); goto out; } - ret = holes + overlaps; - if (ret) { + if (holes || overlaps) { if (missing == layout->cnt) { gf_log (this->name, GF_LOG_DEBUG, "directory %s looked up first time", loc->path); } else { gf_log (this->name, GF_LOG_INFO, - "found anomalies in %s. holes=%d overlaps=%d" - " missing=%d down=%d misc=%d", - loc->path, holes, overlaps, missing, down, - misc); + "found anomalies in %s. holes=%d overlaps=%d", + loc->path, holes, overlaps); } + ret = -1; } for (i = 0; i < layout->cnt; i++) { @@ -658,14 +653,14 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout, (layout->list[i].xlator ? layout->list[i].xlator->name : "<>")); + if ((layout->list[i].err == ENOENT) && (ret >= 0)) { + ret++; + } } } out: - if (missing_p) { - *missing_p = missing; - } return ret; } -- cgit