diff options
-rw-r--r-- | tests/bugs/bug-860663.t | 49 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 22 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 11 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 43 |
6 files changed, 96 insertions, 35 deletions
diff --git a/tests/bugs/bug-860663.t b/tests/bugs/bug-860663.t new file mode 100644 index 00000000000..a36c8e46590 --- /dev/null +++ b/tests/bugs/bug-860663.t @@ -0,0 +1,49 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc + +cleanup; + +function file_count() +{ + val=1 + + if [ "$1" == "$2" ] + then + val=0 + fi + echo $val +} + +BRICK_COUNT=3 + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 +TEST $CLI volume start $V0 + +## Mount FUSE +TEST glusterfs -s $H0 --volfile-id $V0 $M0; +sleep 5; + +touch $M0/files{1..10000} >/dev/null; + +ORIG_FILE_COUNT=`ls -l $M0 | wc -l`; + +# Kill a brick process +kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}1.pid`; + +TEST $CLI volume rebalance $V0 fix-layout start + +sleep 30; + +touch $M0/files{1..10000} >/dev/null; + +TEST $CLI volume start $V0 force + +sleep 5; + +NEW_FILE_COUNT=`ls -l $M0 | wc -l`; + +EXPECT "0" file_count $ORIG_FILE_COUNT $NEW_FILE_COUNT diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 0925b6d2471..70e737c89c6 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2398,9 +2398,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_INFO, "fixing the layout of %s", loc->path); - dht_fix_directory_layout (frame, dht_common_setxattr_cbk, - layout); - return 0; + ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; } tmp = dict_get (xattr, "distribute.directory-spread-count"); @@ -2412,10 +2416,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, (dir_spread > 0))) { layout->spread_cnt = dir_spread; - dht_fix_directory_layout (frame, - dht_common_setxattr_cbk, - layout); - return 0; + ret = dht_fix_directory_layout (frame, + dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; } gf_log (this->name, GF_LOG_ERROR, "wrong 'directory-spread-count' value (%s)", value); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 1a69feba607..1f3ccc1cde0 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -140,7 +140,6 @@ struct dht_local { struct { uint32_t hole_cnt; uint32_t overlaps_cnt; - uint32_t missing; uint32_t down; uint32_t misc; dht_selfheal_dir_cbk_t dir_cbk; @@ -380,7 +379,7 @@ int dht_layout_normalize (xlator_t *this, l int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t *holes_p, uint32_t *overlaps_p, uint32_t *missing_p, uint32_t *down_p, - uint32_t *misc_p); + uint32_t *misc_p, uint32_t *no_space_p); int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, loc_t *loc, dict_t *xattr); diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 993f6166a12..da0e449fdba 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -478,7 +478,8 @@ dht_layout_sort_volname (dht_layout_t *layout) int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t *holes_p, uint32_t *overlaps_p, - uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p) + uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p, + uint32_t *no_space_p) { uint32_t overlaps = 0; uint32_t missing = 0; @@ -491,6 +492,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t prev_stop = 0; uint32_t last_stop = 0; char is_virgin = 1; + uint32_t no_space = 0; /* TODO: explain what is happening */ @@ -508,7 +510,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, down++; break; case ENOSPC: - down++; + no_space++; break; default: misc++; @@ -547,6 +549,9 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, if (misc_p) *misc_p = misc; + if (no_space_p) + *no_space_p = no_space; + return ret; } @@ -571,7 +576,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) ret = dht_layout_anomalies (this, loc, layout, &holes, &overlaps, - &missing, &down, &misc); + &missing, &down, &misc, NULL); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "error while finding anomalies in %s -- not good news", diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index d31f8a012c4..a2e96a1b281 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1383,6 +1383,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, "failed for %s", entry_loc.path); defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + defrag->total_failures ++; goto out; } ret = gf_defrag_fix_layout (this, defrag, &entry_loc, @@ -1391,6 +1392,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "Fix layout " "failed for %s", entry_loc.path); + defrag->total_failures++; goto out; } @@ -1481,6 +1483,7 @@ gf_defrag_start_crawl (void *data) if (ret) { gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed", loc.path); + defrag->total_failures++; goto out; } diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 7a5d4d116ad..4840034a97f 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -593,6 +593,8 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc, dht_layout_t *new_layout = NULL; dht_conf_t *priv = NULL; dht_local_t *local = NULL; + uint32_t subvol_down = 0; + int ret = 0; this = frame->this; priv = this->private; @@ -608,6 +610,17 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc, if (!new_layout) goto done; + /* If a subvolume is down, do not re-write the layout. */ + ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL, + &subvol_down, NULL, NULL); + + if (subvol_down || (ret == -1)) { + gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down" + ". Skipping fix layout.", subvol_down); + GF_FREE (new_layout); + return NULL; + } + for (i = 0; i < new_layout->cnt; i++) { if (layout->list[i].err != ENOSPC) new_layout->list[i].err = layout->list[i].err; @@ -695,35 +708,17 @@ int dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { - dht_conf_t *conf = NULL; - xlator_t *this = NULL; dht_local_t *local = NULL; - int missing = -1; - int down = -1; - int holes = -1; + uint32_t holes = 0; int ret = -1; int i = -1; - int overlaps = -1; + uint32_t overlaps = 0; - this = frame->this; - conf = this->private; local = frame->local; - missing = local->selfheal.missing; - down = local->selfheal.down; holes = local->selfheal.hole_cnt; overlaps = local->selfheal.overlaps_cnt; - if ((missing + down) == conf->subvolume_cnt) { - dht_selfheal_layout_new_directory (frame, loc, layout); - ret = 0; - } - - if (holes <= down) { - /* the down subvol might fill up the holes */ - ret = 0; - } - if (holes || overlaps) { dht_selfheal_layout_new_directory (frame, loc, layout); ret = 0; @@ -775,6 +770,9 @@ dht_fix_directory_layout (call_frame_t *frame, /* No layout sorting required here */ tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout); + if (!tmp_layout) { + return -1; + } dht_fix_dir_xattr (frame, &local->loc, tmp_layout); return 0; @@ -797,9 +795,8 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, dht_layout_anomalies (this, loc, layout, &local->selfheal.hole_cnt, &local->selfheal.overlaps_cnt, - &local->selfheal.missing, - &local->selfheal.down, - &local->selfheal.misc); + NULL, &local->selfheal.down, + &local->selfheal.misc, NULL); down = local->selfheal.down; misc = local->selfheal.misc; |