diff options
author | shishir gowda <sgowda@redhat.com> | 2012-09-26 18:09:33 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-11-29 22:11:06 -0800 |
commit | bde0e142632854b8c74a4df0427662c024dd29c1 (patch) | |
tree | 4f97880501fd288228bdda6dabe67d1dffa42cf1 /xlators/cluster/dht | |
parent | c7baba53635c78b899a165d1de6b6982bb520325 (diff) |
cluster/dht: fail fix-layout if any of the subvol is down
If any subvolume is down, and a layout is re-written and hash
values change, entry names in the downed subvol can be reused
in the other subvol which got the same hash range. when the
downed subvol is brought back up, duplicate entried might appear
Also separated handling of ENOSPC and ENOTCONN error.
Change-Id: I5ed93990425a4cee70df2dab7c7c119fdc87ad56
BUG: 860663
Signed-off-by: shishir gowda <sgowda@redhat.com>
Reviewed-on: http://review.gluster.org/4000
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 22 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 11 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 43 |
5 files changed, 47 insertions, 35 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 0925b6d2471..70e737c89c6 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2398,9 +2398,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_INFO, "fixing the layout of %s", loc->path); - dht_fix_directory_layout (frame, dht_common_setxattr_cbk, - layout); - return 0; + ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; } tmp = dict_get (xattr, "distribute.directory-spread-count"); @@ -2412,10 +2416,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, (dir_spread > 0))) { layout->spread_cnt = dir_spread; - dht_fix_directory_layout (frame, - dht_common_setxattr_cbk, - layout); - return 0; + ret = dht_fix_directory_layout (frame, + dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; } gf_log (this->name, GF_LOG_ERROR, "wrong 'directory-spread-count' value (%s)", value); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 1a69feba607..1f3ccc1cde0 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -140,7 +140,6 @@ struct dht_local { struct { uint32_t hole_cnt; uint32_t overlaps_cnt; - uint32_t missing; uint32_t down; uint32_t misc; dht_selfheal_dir_cbk_t dir_cbk; @@ -380,7 +379,7 @@ int dht_layout_normalize (xlator_t *this, l int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t *holes_p, uint32_t *overlaps_p, uint32_t *missing_p, uint32_t *down_p, - uint32_t *misc_p); + uint32_t *misc_p, uint32_t *no_space_p); int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, loc_t *loc, dict_t *xattr); diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 993f6166a12..da0e449fdba 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -478,7 +478,8 @@ dht_layout_sort_volname (dht_layout_t *layout) int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t *holes_p, uint32_t *overlaps_p, - uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p) + uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p, + uint32_t *no_space_p) { uint32_t overlaps = 0; uint32_t missing = 0; @@ -491,6 +492,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t prev_stop = 0; uint32_t last_stop = 0; char is_virgin = 1; + uint32_t no_space = 0; /* TODO: explain what is happening */ @@ -508,7 +510,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, down++; break; case ENOSPC: - down++; + no_space++; break; default: misc++; @@ -547,6 +549,9 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, if (misc_p) *misc_p = misc; + if (no_space_p) + *no_space_p = no_space; + return ret; } @@ -571,7 +576,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) ret = dht_layout_anomalies (this, loc, layout, &holes, &overlaps, - &missing, &down, &misc); + &missing, &down, &misc, NULL); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "error while finding anomalies in %s -- not good news", diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index d31f8a012c4..a2e96a1b281 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1383,6 +1383,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, "failed for %s", entry_loc.path); defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + defrag->total_failures ++; goto out; } ret = gf_defrag_fix_layout (this, defrag, &entry_loc, @@ -1391,6 +1392,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "Fix layout " "failed for %s", entry_loc.path); + defrag->total_failures++; goto out; } @@ -1481,6 +1483,7 @@ gf_defrag_start_crawl (void *data) if (ret) { gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed", loc.path); + defrag->total_failures++; goto out; } diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 7a5d4d116ad..4840034a97f 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -593,6 +593,8 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc, dht_layout_t *new_layout = NULL; dht_conf_t *priv = NULL; dht_local_t *local = NULL; + uint32_t subvol_down = 0; + int ret = 0; this = frame->this; priv = this->private; @@ -608,6 +610,17 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc, if (!new_layout) goto done; + /* If a subvolume is down, do not re-write the layout. */ + ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL, + &subvol_down, NULL, NULL); + + if (subvol_down || (ret == -1)) { + gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down" + ". Skipping fix layout.", subvol_down); + GF_FREE (new_layout); + return NULL; + } + for (i = 0; i < new_layout->cnt; i++) { if (layout->list[i].err != ENOSPC) new_layout->list[i].err = layout->list[i].err; @@ -695,35 +708,17 @@ int dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { - dht_conf_t *conf = NULL; - xlator_t *this = NULL; dht_local_t *local = NULL; - int missing = -1; - int down = -1; - int holes = -1; + uint32_t holes = 0; int ret = -1; int i = -1; - int overlaps = -1; + uint32_t overlaps = 0; - this = frame->this; - conf = this->private; local = frame->local; - missing = local->selfheal.missing; - down = local->selfheal.down; holes = local->selfheal.hole_cnt; overlaps = local->selfheal.overlaps_cnt; - if ((missing + down) == conf->subvolume_cnt) { - dht_selfheal_layout_new_directory (frame, loc, layout); - ret = 0; - } - - if (holes <= down) { - /* the down subvol might fill up the holes */ - ret = 0; - } - if (holes || overlaps) { dht_selfheal_layout_new_directory (frame, loc, layout); ret = 0; @@ -775,6 +770,9 @@ dht_fix_directory_layout (call_frame_t *frame, /* No layout sorting required here */ tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout); + if (!tmp_layout) { + return -1; + } dht_fix_dir_xattr (frame, &local->loc, tmp_layout); return 0; @@ -797,9 +795,8 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, dht_layout_anomalies (this, loc, layout, &local->selfheal.hole_cnt, &local->selfheal.overlaps_cnt, - &local->selfheal.missing, - &local->selfheal.down, - &local->selfheal.misc); + NULL, &local->selfheal.down, + &local->selfheal.misc, NULL); down = local->selfheal.down; misc = local->selfheal.misc; |