diff options
-rwxr-xr-x | tests/bugs/bug-853258.t | 45 | ||||
-rwxr-xr-x | tests/bugs/overlap.py | 59 | ||||
-rw-r--r-- | tests/volume.rc | 9 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 16 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 82 |
5 files changed, 187 insertions, 24 deletions
diff --git a/tests/bugs/bug-853258.t b/tests/bugs/bug-853258.t new file mode 100755 index 000000000..c702e6f30 --- /dev/null +++ b/tests/bugs/bug-853258.t @@ -0,0 +1,45 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +mkdir -p $H0:$B0/${V0}0 +mkdir -p $H0:$B0/${V0}1 +mkdir -p $H0:$B0/${V0}2 +mkdir -p $H0:$B0/${V0}3 + +# Create and start a volume. +TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 +TEST $CLI volume start $V0 +EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status'; + +# Force assignment of initial ranges. +TEST $CLI volume rebalance $V0 fix-layout start +EXPECT_WITHIN 15 "success:" rebalance_status_field $V0 + +# Get the original values. +xattrs="" +for i in $(seq 0 2); do + xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)" +done + +# Expand the volume and force assignment of new ranges. +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 +# Force assignment of initial ranges. +TEST $CLI volume rebalance $V0 fix-layout start +EXPECT_WITHIN 15 "success:" rebalance_status_field $V0 + +for i in $(seq 0 3); do + xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)" +done + +overlap=$($(dirname $0)/overlap.py $xattrs) +# 2863311531 = 0xaaaaaaab = 2/3 overlap +TEST [ "$overlap" -ge 2863311531 ] + +cleanup diff --git a/tests/bugs/overlap.py b/tests/bugs/overlap.py new file mode 100755 index 000000000..15f2da473 --- /dev/null +++ b/tests/bugs/overlap.py @@ -0,0 +1,59 @@ +#!/usr/bin/python + +import sys + +def calculate_one (ov, nv): + old_start = int(ov[18:26],16) + old_end = int(ov[26:34],16) + new_start = int(nv[18:26],16) + new_end = int(nv[26:34],16) + if (new_end < old_start) or (new_start > old_end): + #print '%s, %s -> ZERO' % (ov, nv) + return 0 + all_start = max(old_start,new_start) + all_end = min(old_end,new_end) + #print '%s, %s -> %08x' % (ov, nv, all_end - all_start + 1) + return all_end - all_start + 1 + +def calculate_all (values): + total = 0 + nv_index = len(values) / 2 + for old_val in values[:nv_index]: + new_val = values[nv_index] + nv_index += 1 + total += calculate_one(old_val,new_val) + return total + +""" +test1_vals = [ + '0x0000000000000000000000003fffffff', # first quarter + '0x0000000000000000400000007fffffff', # second quarter + '0x000000000000000080000000ffffffff', # second half + '0x00000000000000000000000055555554', # first third + '0x000000000000000055555555aaaaaaa9', # second third + '0x0000000000000000aaaaaaaaffffffff', # last third +] + +test2_vals = [ + '0x0000000000000000000000003fffffff', # first quarter + '0x0000000000000000400000007fffffff', # second quarter + '0x000000000000000080000000ffffffff', # second half + '0x00000000000000000000000055555554', # first third + # Next two are (incorrectly) swapped. + '0x0000000000000000aaaaaaaaffffffff', # last third + '0x000000000000000055555555aaaaaaa9', # second third +] + +print '%08x' % calculate_one(test1_vals[0],test1_vals[3]) +print '%08x' % calculate_one(test1_vals[1],test1_vals[4]) +print '%08x' % calculate_one(test1_vals[2],test1_vals[5]) +print '= %08x' % calculate_all(test1_vals) +print '%08x' % calculate_one(test2_vals[0],test2_vals[3]) +print '%08x' % calculate_one(test2_vals[1],test2_vals[4]) +print '%08x' % calculate_one(test2_vals[2],test2_vals[5]) +print '= %08x' % calculate_all(test2_vals) +""" + +if __name__ == '__main__': + # Return decimal so bash can reason about it. + print '%d' % calculate_all(sys.argv[1:]) diff --git a/tests/volume.rc b/tests/volume.rc index 9daf560b7..2a6ada3e6 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -26,8 +26,8 @@ function volume_option() $CLI volume info $vol | egrep "^$key: " | cut -f2 -d' '; } -function rebalance_status_completed_field { - $CLI volume rebalance $V0 status | awk '{print $6}' | sed -n 3p +function rebalance_status_field { + $CLI volume rebalance $1 status | sed -n '$p' | cut -d' ' -f4 } function remove_brick_status_completed_field { @@ -194,3 +194,8 @@ function gd_is_replace_brick_completed { echo "N" fi } + +function dht_get_layout { + local my_xa=trusted.glusterfs.dht + getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2 +} diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 8cae52653..34a7475bd 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -412,6 +412,22 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j) layout->list[j].err = err_swap; } +void +dht_layout_range_swap (dht_layout_t *layout, int i, int j) +{ + uint32_t start_swap = 0; + uint32_t stop_swap = 0; + + start_swap = layout->list[i].start; + stop_swap = layout->list[i].stop; + + layout->list[i].start = layout->list[j].start; + layout->list[i].stop = layout->list[j].stop; + + layout->list[j].start = start_swap; + layout->list[j].stop = stop_swap; +} + int64_t dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j) { diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 8463abdbf..77afde82e 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -38,12 +38,20 @@ dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n) if (old->list[o].err > 0 || new->list[n].err > 0) return 0; + if (old->list[o].start == old->list[o].stop) { + return 0; + } + + if (new->list[n].start == new->list[n].stop) { + return 0; + } + if ((old->list[o].start > new->list[n].stop) || (old->list[o].stop < new->list[n].start)) return 0; - return max (old->list[o].start, new->list[n].start) - - min (old->list[o].stop, new->list[n].stop); + return min (old->list[o].stop, new->list[n].stop) - + max (old->list[o].start, new->list[n].start) + 1; } @@ -564,18 +572,25 @@ void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc, dht_layout_t *new_layout); void dht_layout_entry_swap (dht_layout_t *layout, int i, int j); +void dht_layout_range_swap (dht_layout_t *layout, int i, int j); + +/* + * It's a bit icky using local variables in a macro, but it makes the rest + * of the code a lot clearer. + */ +#define OV_ENTRY(x,y) table[x*new->cnt+y] void dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc, dht_layout_t *new, dht_layout_t *old) { int i = 0; - int k = 0; + int j = 0; uint32_t curr_overlap = 0; uint32_t max_overlap = 0; int max_overlap_idx = -1; uint32_t overlap = 0; - + uint32_t *table = NULL; dht_layout_sort_volname (old); /* Now both old_layout->list[] and new_layout->list[] @@ -584,31 +599,54 @@ dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc, to the same subvolumes */ + /* Build a table of overlaps between new[i] and old[j]. */ + table = alloca(sizeof(overlap)*old->cnt*new->cnt); + if (!table) { + return; + } + memset(table,0,sizeof(overlap)*new->cnt*new->cnt); + for (i = 0; i < new->cnt; ++i) { + for (j = 0; j < old->cnt; ++j) { + OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i); + } + } + for (i = 0; i < new->cnt; i++) { - if (new->list[i].err > 0) + if (new->list[i].err > 0) { /* Subvol might be marked for decommission with EINVAL, or some other serious error marked with positive errno. */ continue; + } - curr_overlap = dht_overlap_calc (old, i, new, i); - max_overlap = curr_overlap; - max_overlap_idx = i; - - /* Subvols up to `i' have already found their best - matches. Search only from the rest. - */ - for (k = (i + 1); k < new->cnt; k++) { - overlap = dht_overlap_calc (old, i, new, k); - if (overlap > max_overlap) { - max_overlap_idx = k; - max_overlap = overlap; - } - } - - if (max_overlap > curr_overlap) - dht_layout_entry_swap (new, i, max_overlap_idx); + max_overlap = 0; + max_overlap_idx = i; + for (j = (i + 1); j < new->cnt; ++j) { + /* Calculate the overlap now. */ + curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j); + /* Calculate the overlap after the proposed swap. */ + overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i); + /* Are we better than status quo? */ + if (overlap > curr_overlap) { + overlap -= curr_overlap; + /* Are we better than the previous choice? */ + if (overlap > max_overlap) { + max_overlap = overlap; + max_overlap_idx = j; + } + } + } + + if (max_overlap_idx != i) { + dht_layout_range_swap (new, i, max_overlap_idx); + /* Need to swap the table values too. */ + for (j = 0; j < old->cnt; ++j) { + overlap = OV_ENTRY(i,j); + OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j); + OV_ENTRY(max_overlap_idx,j) = overlap; + } + } } } |