cluster/ec: Wait for all bricks to notify before notifying parent

This is to prevent spurious heals that can result in self-heal. Change-Id: I0b27c1c1fc7a58e2683cb1ca135117a85efcc6c9 BUG: 1179180 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/9523 Reviewed-by: Xavier Hernandez <xhernandez@datalab.es> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
author: Pranith Kumar K <pkarampu@redhat.com> 2015-02-01 15:03:46 +0530
committer: Vijay Bellur <vbellur@redhat.com> 2015-02-02 12:22:02 -0800
commit: da1ff66255017501f54c50b3c40eeea11b5fc38f (patch)
tree: e3389286e0d00a0cf7d80514236314358ec6e48a /xlators/cluster
parent: 3622146ac06918069a5994ac0dfa02aa98de4d8f (diff)
1 files changed, 34 insertions, 14 deletions
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 76911de1461..219494b961e 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -198,14 +198,21 @@ failed:
 glusterfs_event_t
 ec_get_event_from_state (ec_t *ec)
 {
-        int     down_count = 0;
+        int               down_count = 0;
 
-        if (ec->xl_up_count >= ec->fragments)
-                return GF_EVENT_CHILD_UP;
+        if (ec->xl_up_count >= ec->fragments) {
+                /* If ec is up but some subvolumes are yet to notify, give
+                 * grace time for other subvols to notify to prevent start of
+                 * I/O which may result in self-heals */
+                if (ec->timer && ec->xl_notify_count < ec->nodes)
+                        return GF_EVENT_MAXVAL;
 
-        down_count = ec->xl_notify_count - ec->xl_up_count;
-        if (down_count > ec->redundancy)
-                return GF_EVENT_CHILD_DOWN;
+                return GF_EVENT_CHILD_UP;
+        } else {
+                down_count = ec->xl_notify_count - ec->xl_up_count;
+                if (down_count > ec->redundancy)
+                        return GF_EVENT_CHILD_DOWN;
+        }
 
         return GF_EVENT_MAXVAL;
 }
@@ -235,9 +242,10 @@ ec_down (xlator_t *this, ec_t *ec)
 }
 
 void
-ec_notify_down (void *data)
+ec_notify_cbk (void *data)
 {
         ec_t *ec = data;
+        glusterfs_event_t event = GF_EVENT_MAXVAL;
 
         LOCK(&ec->lock);
         {
@@ -252,26 +260,38 @@ ec_notify_down (void *data)
                 gf_timer_call_cancel (ec->xl->ctx, ec->timer);
                 ec->timer = NULL;
 
-                if (GF_EVENT_MAXVAL == ec_get_event_from_state (ec)) {
-                        /* Change the state as if the bricks are down */
+                event = ec_get_event_from_state (ec);
+                /* If event is still MAXVAL then enough subvolumes didn't
+                 * notify, treat it as CHILD_DOWN. */
+                if (event == GF_EVENT_MAXVAL) {
+                        event = GF_EVENT_CHILD_DOWN;
                         ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
                         ec->xl_notify_count = ec->nodes;
-                        default_notify (ec->xl, GF_EVENT_CHILD_DOWN, NULL);
+                } else if (event == GF_EVENT_CHILD_UP) {
+                        /* Rest of the bricks are still not coming up,
+                         * notify that ec is up. Files/directories will be
+                         * healed as in when they come up. */
+                        ec_up (ec->xl, ec);
                 }
+
+                /* CHILD_DOWN should not come here as no grace period is given
+                 * for notifying CHILD_DOWN. */
+
+                default_notify (ec->xl, event, NULL);
         }
 unlock:
         UNLOCK(&ec->lock);
 }
 
 void
-ec_launch_notify_child_down_timer (xlator_t *this, ec_t *ec)
+ec_launch_notify_timer (xlator_t *this, ec_t *ec)
 {
         struct timespec delay = {0, };
 
         gf_log (this->name, GF_LOG_DEBUG, "Initiating child-down timer");
         delay.tv_sec = 10;
         delay.tv_nsec = 0;
-        ec->timer = gf_timer_call_after (this->ctx, delay, ec_notify_down, ec);
+        ec->timer = gf_timer_call_after (this->ctx, delay, ec_notify_cbk, ec);
         if (ec->timer == NULL) {
                 gf_log(this->name, GF_LOG_ERROR, "Cannot create timer "
                        "for delayed initialization");
@@ -321,10 +341,10 @@ notify (xlator_t *this, int32_t event, void *data, ...)
 
         if (event == GF_EVENT_PARENT_UP) {
                 /*
-                 * Start a timer which sends CHILD_DOWN event to parent
+                 * Start a timer which sends appropriate event to parent
                  * xlator to prevent the 'mount' syscall from hanging.
                  */
-                ec_launch_notify_child_down_timer (this, ec);
+                ec_launch_notify_timer (this, ec);
                 goto unlock;
         }
author	Pranith Kumar K <pkarampu@redhat.com>	2015-02-01 15:03:46 +0530
committer	Vijay Bellur <vbellur@redhat.com>	2015-02-02 12:22:02 -0800
commit	da1ff66255017501f54c50b3c40eeea11b5fc38f (patch)
tree	e3389286e0d00a0cf7d80514236314358ec6e48a /xlators/cluster
parent	3622146ac06918069a5994ac0dfa02aa98de4d8f (diff)