diff options
-rw-r--r-- | tests/bugs/glusterd/bug-1451248-mux-reboot-node.t | 54 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 6 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 18 |
4 files changed, 79 insertions, 0 deletions
diff --git a/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t new file mode 100644 index 00000000000..5d8ce6e75e6 --- /dev/null +++ b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t @@ -0,0 +1,54 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../volume.rc + +function count_up_bricks { + $CLI --xml volume status all | grep '<status>1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +function count_brick_pids { + $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ + | grep -v "N/A" | sort | uniq | wc -l +} + +cleanup; + +TEST glusterd +TEST $CLI volume set all cluster.brick-multiplex on +push_trapfunc "$CLI volume set all cluster.brick-multiplex off" +push_trapfunc "cleanup" + +TEST $CLI volume create $V0 $H0:$B0/brick{0..2} +TEST $CLI volume start $V0 + +EXPECT 1 count_brick_processes +EXPECT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +pkill gluster +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +pkill glusterd +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +TEST $CLI volume create $V1 $H0:$B0/brick{3..5} +TEST $CLI volume start $V1 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 2a5772b6669..f16bc20c01f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -5811,7 +5811,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_set_connected (&rpc->conn); gf_msg_debug (this->name, 0, "Connected to %s:%s", brickinfo->hostname, brickinfo->path); + glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); + brickinfo->started_here = _gf_true; + gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s", brickinfo->hostname, volinfo->volname, brickinfo->path); @@ -5841,6 +5844,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, GD_MSG_BRICK_DISCONNECTED, "Brick %s:%s has disconnected from glusterd.", brickinfo->hostname, brickinfo->path); + + brickinfo->started_here = _gf_false; + ret = get_volinfo_from_brickid (brickid, &volinfo); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index c3f5dbc3cd1..2b60b17841b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -563,6 +563,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req) GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); sys_unlink (pidfile); + brickinfo->started_here = _gf_false; } } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 47ac842193e..ea8d60cd87b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2143,6 +2143,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile); (void) sys_unlink (pidfile); + + brickinfo->started_here = _gf_false; out: return ret; } @@ -5170,6 +5172,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, glusterd_brickinfo_t *other_brick; char pidfile2[PATH_MAX] = {0}; int32_t pid2 = -1; + int16_t retries = 15; /* * If comp_vol is provided, we have to check *volume* compatibility @@ -5212,8 +5215,22 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) { continue; } + GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick, conf); + + /* It is possible that the pidfile hasn't yet been populated, + * when bricks are started in "no-wait" mode; for example + * when bricks are started by glusterd_restart_bricks(). So + * wait for the pidfile to be populated with a value before + * checking if the service is running */ + while (retries > 0) { + if (sys_access (pidfile2, F_OK) == 0) + break; + sleep (1); + retries--; + } + if (!gf_is_service_running (pidfile2, &pid2)) { gf_log (this->name, GF_LOG_INFO, "cleaning up dead brick %s:%s", @@ -5457,6 +5474,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, socketpath, brickinfo->path, volinfo->volname); (void) glusterd_brick_connect (volinfo, brickinfo, socketpath); + brickinfo->started_here = _gf_true; } return 0; } |