diff options
| author | Jeff Darcy <jdarcy@redhat.com> | 2017-02-02 13:08:04 -0500 | 
|---|---|---|
| committer | Atin Mukherjee <amukherj@redhat.com> | 2017-02-02 23:06:09 -0500 | 
| commit | b4aac5d334d5102ef3b935a200502cfcd63bd12c (patch) | |
| tree | 53d15581285af59258d366a9d9507caeec559e6f | |
| parent | 1e849f616f6ae2f97ab307abaff69182c87571ca (diff) | |
glusterd: double-check brick liveness for remove-brick validation
Same problem as https://review.gluster.org/#/c/16509/ in a different
place.  Tests detach bricks without glusterd's knowledge, so
glusterd's internal brick state is out of date and we have to re-check
(via the brick's pidfile) as well.
BUG: 1385758
Change-Id: I169538c1c62d72a685a49d57ef65fb6c3db6eab2
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: https://review.gluster.org/16529
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
| -rw-r--r-- | tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t | 6 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 24 | 
2 files changed, 24 insertions, 6 deletions
| diff --git a/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t b/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t index eca1c1a3723..47403b44ee6 100644 --- a/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t +++ b/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t @@ -12,7 +12,8 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2  TEST $CLI volume start $V0  #kill a brick process -kill -15 `cat $GLUSTERD_WORKDIR/vols/$V0/run/$H0-d-backends-${V0}1.pid`; +kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1  #remove-brick start should fail as the brick is down  TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start @@ -26,7 +27,8 @@ TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start  EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}1"  #kill a brick process -kill -15 `cat $GLUSTERD_WORKDIR/vols/$V0/run/$H0-d-backends-${V0}1.pid`; +kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1  #remove-brick commit should pass even if the brick is down  TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index c78fbd8345c..7b5264ad8a1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1947,6 +1947,8 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,          glusterd_peerinfo_t    *peerinfo    = NULL;          int                     i           = 0;          int                     ret         = -1; +        char                    pidfile[PATH_MAX+1] = {0,}; +        glusterd_conf_t        *priv        = THIS->private;          /* Check whether all the nodes of the bricks to be removed are          * up, if not fail the operation */ @@ -2008,16 +2010,30 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,                  }                  if (glusterd_is_local_brick (THIS, volinfo, brickinfo)) { -                        if (((cmd == GF_OP_CMD_START) || -                            (cmd == GF_OP_CMD_DETACH_START) || -                            (cmd == GF_DEFRAG_CMD_DETACH_START))  && -                            brickinfo->status != GF_BRICK_STARTED) { +                        switch (cmd) { +                        case GF_OP_CMD_START: +                        case GF_OP_CMD_DETACH_START: +                        case GF_DEFRAG_CMD_DETACH_START: +                                break; +                        default: +                                continue; +                        } +                        if (brickinfo->status != GF_BRICK_STARTED) {                                  snprintf (msg, sizeof (msg), "Found stopped "                                            "brick %s", brick);                                  *errstr = gf_strdup (msg);                                  ret = -1;                                  goto out;                          } +                        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, +                                                    brickinfo, priv); +                        if (!gf_is_service_running (pidfile, NULL)) { +                                snprintf (msg, sizeof (msg), "Found dead " +                                          "brick %s", brick); +                                *errstr = gf_strdup (msg); +                                ret = -1; +                                goto out; +                        }                          continue;                  } | 
