diff options
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 0c621271405..d5002a2070b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4293,6 +4293,17 @@ __get_heard_from_all_status (xlator_t *this) * * Passed to the qsort function to order a list of children by the latency * and/or up/down states. + * + * Note: This isn't as simple as taking the latencies and calling it a + * a day. Children can be marked down, which overrides their latency + * signal. Having a lower-latency child available doesn't guarentee this + * child shall be marked up: we don't want to constantly be swapping + * slightly better bricks for others...this is jarring to clients and + * could cause all sorts of issues. Plus, the fail-over, max-replicas + * flags must all be honored which manage the up/down state of children. + * + * In short, the (as marked) up/down down state of the brick shall always + * take precedence when sorting by latency. */ static int _afr_cmp_child (const void *child1, const void *child2) @@ -4300,6 +4311,18 @@ _afr_cmp_child (const void *child1, const void *child2) struct afr_child *child11 = (struct afr_child *)child1; struct afr_child *child22 = (struct afr_child *)child2; + /* If both children are _marked_ down they are equal */ + if (!child11->child_up && !child22->child_up) + return 0; + + /* Prefer child 2, child 1 is _marked_ down, child 2 is not */ + if (!child11->child_up && child22->child_up) + return 1; + + /* Prefer child 1, child 2 is _marked_ down, child 1 is not */ + if (child11->child_up && !child22->child_up) + return -1; + if (child11->latency > child22->latency) { return 1; } |
