diff options
author | Atin Mukherjee <amukherj@redhat.com> | 2016-07-25 19:09:08 +0530 |
---|---|---|
committer | Atin Mukherjee <amukherj@redhat.com> | 2016-08-03 21:43:34 -0700 |
commit | c3dee6d35326c6495591eb5bbf7f52f64031e2c4 (patch) | |
tree | 7a57db6876b86434255068c41479066a0cf51caa | |
parent | ea6a1ebe931e49464eb17205b94f5c87765cf696 (diff) |
glusterd: clean up old port and allocate new one on every restart
GlusterD as of now was blindly assuming that the brick port which was already
allocated would be available to be reused and that assumption is absolutely
wrong.
Solution : On first attempt, we thought GlusterD should check if the already
allocated brick ports are free, if not allocate new port and pass it to the
daemon. But with that approach there is a possibility that if PMAP_SIGNOUT is
missed out, the stale port will be given back to the clients where connection
will keep on failing. Now given the port allocation always start from base_port,
if everytime a new port has to be allocated for the daemons, the port range will
still be under control. So this fix tries to clean up old port using
pmap_registry_remove () if any and then goes for pmap_registry_alloc ()
Change-Id: If54a055d01ab0cbc06589dc1191d8fc52eb2c84f
BUG: 1221623
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: http://review.gluster.org/15005
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Avra Sengupta <asengupt@redhat.com>
-rwxr-xr-x | tests/bugs/glusterd/1313628-import-brick-ports-always.t | 47 | ||||
-rw-r--r-- | tests/bugs/snapshot/bug-1316437.t | 2 | ||||
-rw-r--r-- | tests/features/ssl-ciphers.t | 5 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-messages.h | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 24 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.h | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 23 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 12 |
8 files changed, 46 insertions, 79 deletions
diff --git a/tests/bugs/glusterd/1313628-import-brick-ports-always.t b/tests/bugs/glusterd/1313628-import-brick-ports-always.t deleted file mode 100755 index d04c4293466..00000000000 --- a/tests/bugs/glusterd/1313628-import-brick-ports-always.t +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../cluster.rc - -## Check that brick ports are always copied on import -## -------------------------------------------------- -## This test checks that the brick ports are copied on import by checking that -## they don't change when the following happens, -## - Stop a volume -## - Stop glusterd -## - Start the stopped volume -## - Start the stopped glusterd - -function get_brick_port() { - local VOL=$1 - local BRICK=$2 - $CLI2 volume status $VOL $BRICK --xml | sed -ne 's/.*<port>\([0-9]*\)<\/port>/\1/p' -} - - -cleanup - -TEST launch_cluster 2 -TEST $CLI1 peer probe $H2 -EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count - -# Create and start volume so that brick port assignment happens -TEST $CLI1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 -TEST $CLI1 volume start $V0 - -# Save port for 2nd brick -BPORT_ORIG=$(get_brick_port $V0 $H2:$B2/$V0) - -# Stop volume, stop 2nd glusterd, start volume, start 2nd glusterd -TEST $CLI1 volume stop $V0 -TEST kill_glusterd 2 - -TEST $CLI1 volume start $V0 -TEST start_glusterd 2 -EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count - -# Get new port and compare with old one -EXPECT_WITHIN $PROCESS_UP_TIMEOUT $BPORT_ORIG get_brick_port $V0 $H2:$B2/$V0 - -$CLI1 volume stop $V0 - -cleanup diff --git a/tests/bugs/snapshot/bug-1316437.t b/tests/bugs/snapshot/bug-1316437.t index 6370a7a1d25..30a221e3171 100644 --- a/tests/bugs/snapshot/bug-1316437.t +++ b/tests/bugs/snapshot/bug-1316437.t @@ -28,5 +28,3 @@ glusterd EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist cleanup; -#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1359717 -#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1359717 diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t index 9ee7fc6c16f..f5909f320ac 100644 --- a/tests/features/ssl-ciphers.t +++ b/tests/features/ssl-ciphers.t @@ -137,6 +137,7 @@ EXPECT "`pwd`/`dirname $0`/dh1024.pem" volume_option $V0 ssl.dh-param TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "Y" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT # Test the cipher-list option @@ -145,6 +146,7 @@ EXPECT AES256-SHA volume_option $V0 ssl.cipher-list TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT @@ -154,6 +156,7 @@ EXPECT EECDH:EDH:!TLSv1 volume_option $V0 ssl.cipher-list TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT @@ -162,6 +165,7 @@ EXPECT invalid volume_option $V0 ssl.ec-curve TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT TEST $CLI volume set $V0 ssl.ec-curve secp521r1 @@ -169,6 +173,7 @@ EXPECT secp521r1 volume_option $V0 ssl.ec-curve TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count +BRICK_PORT=`brick_port $V0` EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT # test revocation diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index ba40b8f7628..623f4dc414e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 578 +#define GLFS_NUM_MESSAGES 579 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4673,6 +4673,14 @@ */ #define GD_MSG_DICT_GET_SUCCESS (GLUSTERD_COMP_BASE + 578) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_PMAP_REGISTRY_REMOVE_FAIL (GLUSTERD_COMP_BASE + 579) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 7ed03905774..6c65da79392 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -203,6 +203,29 @@ pmap_registry_alloc (xlator_t *this) return port; } +/* pmap_assign_port does a pmap_registry_remove followed by pmap_registry_alloc, + * the reason for the former is to ensure we don't end up with stale ports + */ +int +pmap_assign_port (xlator_t *this, int old_port, const char *path) +{ + int ret = -1; + int new_port = 0; + + if (old_port) { + ret = pmap_registry_remove (this, 0, path, + GF_PMAP_PORT_BRICKSERVER, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, + GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0, "Failed toi" + "remove pmap registry for older signin for path" + " %s", path); + } + } + new_port = pmap_registry_alloc (this); + return new_port; +} + int pmap_registry_bind (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt) @@ -420,7 +443,6 @@ __gluster_pmap_signout (rpcsvc_request_t *req) req->rpc_err = GARBAGE_ARGS; goto fail; } - rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick, GF_PMAP_PORT_BRICKSERVER, req->trans); diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h index 95ded04208d..14187daee2b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.h +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h @@ -35,6 +35,8 @@ struct pmap_registry { struct pmap_port_status ports[65536]; }; +int pmap_assign_port (xlator_t *this, int port, const char *path); +int pmap_mark_port_leased (xlator_t *this, int port); int pmap_registry_alloc (xlator_t *this); int pmap_registry_bind (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c index 830dc1a706d..36e4a196845 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c @@ -295,28 +295,7 @@ glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags) "--brick-name", snapd_id, "-S", svc->conn.sockpath, NULL); - /* Do a pmap registry remove on the older connected port */ - if (volinfo->snapd.port) { - ret = pmap_registry_remove (this, volinfo->snapd.port, - snapd_id, GF_PMAP_PORT_BRICKSERVER, - NULL); - if (ret) { - snprintf (msg, sizeof (msg), "Failed to remove pmap " - "registry for older signin"); - goto out; - } - } - - snapd_port = pmap_registry_alloc (THIS); - if (!snapd_port) { - snprintf (msg, sizeof (msg), "Could not allocate port " - "for snapd service for volume %s", - volinfo->volname); - runner_log (&runner, this->name, GF_LOG_DEBUG, msg); - ret = -1; - goto out; - } - + snapd_port = pmap_assign_port (THIS, volinfo->snapd.port, snapd_id); volinfo->snapd.port = snapd_port; runner_add_arg (&runner, "--brick-port"); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index a03b041a4e8..20aba127d67 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1786,6 +1786,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, char socketpath[PATH_MAX] = {0}; char glusterd_uuid[1024] = {0,}; char valgrind_logfile[PATH_MAX] = {0}; + char rdma_brick_path[PATH_MAX] = {0,}; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1818,9 +1819,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, if (gf_is_service_running (pidfile, NULL)) goto connect; - port = brickinfo->port; - if (!port) - port = pmap_registry_alloc (THIS); + port = pmap_assign_port (THIS, brickinfo->port, brickinfo->path); /* Build the exp_path, before starting the glusterfsd even in valgrind mode. Otherwise all the glusterfsd processes start @@ -1885,9 +1884,10 @@ retry: if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { runner_argprintf (&runner, "%d", port); } else { - rdma_port = brickinfo->rdma_port; - if (!rdma_port) - rdma_port = pmap_registry_alloc (THIS); + snprintf (rdma_brick_path, sizeof(rdma_brick_path), "%s.rdma", + brickinfo->path); + rdma_port = pmap_assign_port (THIS, brickinfo->rdma_port, + rdma_brick_path); runner_argprintf (&runner, "%d,%d", port, rdma_port); runner_add_arg (&runner, "--xlator-option"); runner_argprintf (&runner, "%s-server.transport.rdma.listen-port=%d", |