From 72452e07fcf91627afb6d45b921dfefd2610686f Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Wed, 1 Feb 2017 21:54:30 -0500 Subject: glusterd: double-check whether brick is alive for stats With multiplexing, our tests detach bricks from their host processes without glusterd being involved. Thus, when we ask glusterd to fetch profile info, it will try to fetch from a brick that's actually not present any more. While it can handle the process being dead and its RPC connection being closed, it barfs if it gets a negative response from a live brick process. This is not a problem in normal use, because the brick can't disappear without glusterd seeing it. The fix is to double check that the brick is actually running, by looking for its pidfile which the tests *do* clean up as part of killing a brick. Change-Id: I098465b175ecf23538bd7207357c752a2bba8f4e BUG: 1385758 Signed-off-by: Jeff Darcy Reviewed-on: https://review.gluster.org/16509 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Atin Mukherjee --- xlators/mgmt/glusterd/src/glusterd-op-sm.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index d9b18e00195..6bc01f702cc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -6315,15 +6315,14 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr, glusterd_brickinfo_t *brickinfo = NULL; glusterd_pending_node_t *pending_node = NULL; char *brick = NULL; - - + int32_t pid = -1; + char pidfile[PATH_MAX] = {0}; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - ret = dict_get_str (dict, "volname", &volname); if (ret) { gf_msg ("glusterd", GF_LOG_ERROR, 0, @@ -6383,6 +6382,18 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr, cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { if (glusterd_is_brick_started (brickinfo)) { + /* + * In normal use, glusterd_is_brick_started + * will give us the answer we need. However, + * in our tests the brick gets detached behind + * our back, so we need to double-check this + * way. + */ + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, + brickinfo, priv); + if (!gf_is_service_running (pidfile, &pid)) { + continue; + } pending_node = GF_CALLOC (1, sizeof (*pending_node), gf_gld_mt_pending_node_t); if (!pending_node) { -- cgit