summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xtests/bugs/bug-853258.t45
-rwxr-xr-xtests/bugs/overlap.py59
-rw-r--r--tests/volume.rc9
-rw-r--r--xlators/cluster/dht/src/dht-layout.c16
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c82
5 files changed, 187 insertions, 24 deletions
diff --git a/tests/bugs/bug-853258.t b/tests/bugs/bug-853258.t
new file mode 100755
index 000000000..c702e6f30
--- /dev/null
+++ b/tests/bugs/bug-853258.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status';
+
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN 15 "success:" rebalance_status_field $V0
+
+# Get the original values.
+xattrs=""
+for i in $(seq 0 2); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+# Expand the volume and force assignment of new ranges.
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN 15 "success:" rebalance_status_field $V0
+
+for i in $(seq 0 3); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+overlap=$($(dirname $0)/overlap.py $xattrs)
+# 2863311531 = 0xaaaaaaab = 2/3 overlap
+TEST [ "$overlap" -ge 2863311531 ]
+
+cleanup
diff --git a/tests/bugs/overlap.py b/tests/bugs/overlap.py
new file mode 100755
index 000000000..15f2da473
--- /dev/null
+++ b/tests/bugs/overlap.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+
+import sys
+
+def calculate_one (ov, nv):
+ old_start = int(ov[18:26],16)
+ old_end = int(ov[26:34],16)
+ new_start = int(nv[18:26],16)
+ new_end = int(nv[26:34],16)
+ if (new_end < old_start) or (new_start > old_end):
+ #print '%s, %s -> ZERO' % (ov, nv)
+ return 0
+ all_start = max(old_start,new_start)
+ all_end = min(old_end,new_end)
+ #print '%s, %s -> %08x' % (ov, nv, all_end - all_start + 1)
+ return all_end - all_start + 1
+
+def calculate_all (values):
+ total = 0
+ nv_index = len(values) / 2
+ for old_val in values[:nv_index]:
+ new_val = values[nv_index]
+ nv_index += 1
+ total += calculate_one(old_val,new_val)
+ return total
+
+"""
+test1_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ '0x000000000000000055555555aaaaaaa9', # second third
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+]
+
+test2_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ # Next two are (incorrectly) swapped.
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+ '0x000000000000000055555555aaaaaaa9', # second third
+]
+
+print '%08x' % calculate_one(test1_vals[0],test1_vals[3])
+print '%08x' % calculate_one(test1_vals[1],test1_vals[4])
+print '%08x' % calculate_one(test1_vals[2],test1_vals[5])
+print '= %08x' % calculate_all(test1_vals)
+print '%08x' % calculate_one(test2_vals[0],test2_vals[3])
+print '%08x' % calculate_one(test2_vals[1],test2_vals[4])
+print '%08x' % calculate_one(test2_vals[2],test2_vals[5])
+print '= %08x' % calculate_all(test2_vals)
+"""
+
+if __name__ == '__main__':
+ # Return decimal so bash can reason about it.
+ print '%d' % calculate_all(sys.argv[1:])
diff --git a/tests/volume.rc b/tests/volume.rc
index 9daf560b7..2a6ada3e6 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -26,8 +26,8 @@ function volume_option()
$CLI volume info $vol | egrep "^$key: " | cut -f2 -d' ';
}
-function rebalance_status_completed_field {
- $CLI volume rebalance $V0 status | awk '{print $6}' | sed -n 3p
+function rebalance_status_field {
+ $CLI volume rebalance $1 status | sed -n '$p' | cut -d' ' -f4
}
function remove_brick_status_completed_field {
@@ -194,3 +194,8 @@ function gd_is_replace_brick_completed {
echo "N"
fi
}
+
+function dht_get_layout {
+ local my_xa=trusted.glusterfs.dht
+ getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2
+}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 8cae52653..34a7475bd 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -412,6 +412,22 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
layout->list[j].err = err_swap;
}
+void
+dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+}
+
int64_t
dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
{
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 8463abdbf..77afde82e 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -38,12 +38,20 @@ dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
if (old->list[o].err > 0 || new->list[n].err > 0)
return 0;
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
+ }
+
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
+ }
+
if ((old->list[o].start > new->list[n].stop) ||
(old->list[o].stop < new->list[n].start))
return 0;
- return max (old->list[o].start, new->list[n].start) -
- min (old->list[o].stop, new->list[n].stop);
+ return min (old->list[o].stop, new->list[n].stop) -
+ max (old->list[o].start, new->list[n].start) + 1;
}
@@ -564,18 +572,25 @@ void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
+void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
+
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x,y) table[x*new->cnt+y]
void
dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
dht_layout_t *new, dht_layout_t *old)
{
int i = 0;
- int k = 0;
+ int j = 0;
uint32_t curr_overlap = 0;
uint32_t max_overlap = 0;
int max_overlap_idx = -1;
uint32_t overlap = 0;
-
+ uint32_t *table = NULL;
dht_layout_sort_volname (old);
/* Now both old_layout->list[] and new_layout->list[]
@@ -584,31 +599,54 @@ dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
to the same subvolumes
*/
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap)*old->cnt*new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table,0,sizeof(overlap)*new->cnt*new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
+ }
+ }
+
for (i = 0; i < new->cnt; i++) {
- if (new->list[i].err > 0)
+ if (new->list[i].err > 0) {
/* Subvol might be marked for decommission
with EINVAL, or some other serious error
marked with positive errno.
*/
continue;
+ }
- curr_overlap = dht_overlap_calc (old, i, new, i);
- max_overlap = curr_overlap;
- max_overlap_idx = i;
-
- /* Subvols up to `i' have already found their best
- matches. Search only from the rest.
- */
- for (k = (i + 1); k < new->cnt; k++) {
- overlap = dht_overlap_calc (old, i, new, k);
- if (overlap > max_overlap) {
- max_overlap_idx = k;
- max_overlap = overlap;
- }
- }
-
- if (max_overlap > curr_overlap)
- dht_layout_entry_swap (new, i, max_overlap_idx);
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
+ }
+ }
+ }
+
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap (new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i,j);
+ OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
+ OV_ENTRY(max_overlap_idx,j) = overlap;
+ }
+ }
}
}