summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec.c')
-rw-r--r--xlators/cluster/ec/src/ec.c101
1 files changed, 60 insertions, 41 deletions
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 59b4aeefe69..f25760049c3 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -322,7 +322,7 @@ ec_get_event_from_state (ec_t *ec)
/* If ec is up but some subvolumes are yet to notify, give
* grace time for other subvols to notify to prevent start of
* I/O which may result in self-heals */
- if (ec->timer && ec->xl_notify_count < ec->nodes)
+ if (ec->xl_notify_count < ec->nodes)
return GF_EVENT_MAXVAL;
return GF_EVENT_CHILD_UP;
@@ -344,8 +344,8 @@ ec_up (xlator_t *this, ec_t *ec)
}
ec->up = 1;
- gf_msg (this->name, GF_LOG_INFO, 0,
- EC_MSG_EC_UP, "Going UP");
+ gf_msg (this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+
gf_event (EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -358,8 +358,8 @@ ec_down (xlator_t *this, ec_t *ec)
}
ec->up = 0;
- gf_msg (this->name, GF_LOG_INFO, 0,
- EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg (this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+
gf_event (EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -383,31 +383,38 @@ ec_notify_cbk (void *data)
gf_timer_call_cancel (ec->xl->ctx, ec->timer);
ec->timer = NULL;
+ /* The timeout has expired, so any subvolume that has not
+ * already reported its state, will be considered to be down.
+ * We mark as if all bricks had reported. */
+ ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
+ ec->xl_notify_count = ec->nodes;
+
+ /* Since we have marked all subvolumes as notified, it's
+ * guaranteed that ec_get_event_from_state() will return
+ * CHILD_UP or CHILD_DOWN, but not MAXVAL. */
event = ec_get_event_from_state (ec);
- /* If event is still MAXVAL then enough subvolumes didn't
- * notify, treat it as CHILD_DOWN. */
- if (event == GF_EVENT_MAXVAL) {
- event = GF_EVENT_CHILD_DOWN;
- ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
- ec->xl_notify_count = ec->nodes;
- } else if (event == GF_EVENT_CHILD_UP) {
- /* Rest of the bricks are still not coming up,
- * notify that ec is up. Files/directories will be
- * healed as in when they come up. */
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We are ready to bring the volume up. If there are
+ * still bricks DOWN, they will be healed when they
+ * come up. */
ec_up (ec->xl, ec);
}
- /* CHILD_DOWN should not come here as no grace period is given
- * for notifying CHILD_DOWN. */
-
propagate = _gf_true;
}
unlock:
UNLOCK(&ec->lock);
if (propagate) {
+ if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
+ /* We have just brought the volume UP, so we trigger
+ * a self-heal check on the root directory. */
+ ec_launch_replace_heal (ec);
+ }
+
default_notify (ec->xl, event, NULL);
}
+
}
void
@@ -442,7 +449,7 @@ ec_pending_fops_completed(ec_t *ec)
}
}
-static void
+static gf_boolean_t
ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
{
uintptr_t current_state = 0;
@@ -455,7 +462,11 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
if (current_state != new_state) {
ec->xl_up ^= index_mask;
ec->xl_up_count += (current_state ? -1 : 1);
+
+ return _gf_true;
}
+
+ return _gf_false;
}
static gf_boolean_t
@@ -498,15 +509,16 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
int32_t
ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
{
- ec_t *ec = this->private;
- int32_t idx = 0;
- int32_t error = 0;
- glusterfs_event_t old_event = GF_EVENT_MAXVAL;
- dict_t *input = NULL;
- dict_t *output = NULL;
- gf_boolean_t propagate = _gf_true;
- int32_t orig_event = event;
- uintptr_t mask = 0;
+ ec_t *ec = this->private;
+ int32_t idx = 0;
+ int32_t error = 0;
+ glusterfs_event_t old_event = GF_EVENT_MAXVAL;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t propagate = _gf_true;
+ gf_boolean_t needs_shd_check = _gf_false;
+ int32_t orig_event = event;
+ uintptr_t mask = 0;
gf_msg_trace (this->name, 0, "NOTIFY(%d): %p, %p",
event, data, data2);
@@ -529,8 +541,6 @@ ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
for (idx = 0; idx < ec->nodes; idx++) {
if (ec->xl_list[idx] == data) {
- if (event == GF_EVENT_CHILD_UP)
- ec_selfheal_childup (ec, idx);
break;
}
}
@@ -556,17 +566,27 @@ ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
mask = 1ULL << idx;
if (event == GF_EVENT_CHILD_UP) {
- ec_set_up_state(ec, mask, mask);
+ /* We need to trigger a selfheal if a brick changes
+ * to UP state. */
+ needs_shd_check = ec_set_up_state(ec, mask, mask);
} else if (event == GF_EVENT_CHILD_DOWN) {
- ec_set_up_state(ec, mask, 0);
+ ec_set_up_state(ec, mask, 0);
}
event = ec_get_event_from_state (ec);
- if (event == GF_EVENT_CHILD_UP && !ec->up) {
- ec_up (this, ec);
- } else if (event == GF_EVENT_CHILD_DOWN && ec->up) {
- ec_down (this, ec);
+ if (event == GF_EVENT_CHILD_UP) {
+ if (!ec->up) {
+ ec_up (this, ec);
+ }
+ } else {
+ /* If the volume is not UP, it's irrelevant if one
+ * brick has come up. We cannot heal anything. */
+ needs_shd_check = _gf_false;
+
+ if ((event == GF_EVENT_CHILD_DOWN) && ec->up) {
+ ec_down (this, ec);
+ }
}
if (event != GF_EVENT_MAXVAL) {
@@ -585,14 +605,13 @@ unlock:
done:
if (propagate) {
+ if (needs_shd_check && ec->shd.iamshd) {
+ ec_launch_replace_heal (ec);
+ }
+
error = default_notify (this, event, data);
}
- if (ec->shd.iamshd &&
- ec->xl_notify_count == ec->nodes &&
- event == GF_EVENT_CHILD_UP) {
- ec_launch_replace_heal (ec);
- }
out:
return error;
}