diff options
author | Atin Mukherjee <amukherj@redhat.com> | 2016-07-25 19:09:08 +0530 |
---|---|---|
committer | Niels de Vos <ndevos@redhat.com> | 2016-11-23 02:37:16 -0800 |
commit | 394c654cd26f232ed493442a5858017be0518b28 (patch) | |
tree | 5a1fb5587d60c40fbe7562545dd92c46936f2a6b /xlators/mgmt | |
parent | 484b72177a035c5882c82796ca186d0698947e86 (diff) |
glusterd: clean up old port and allocate new one on every restart
Backport of http://review.gluster.org/#/c/15005/9.
GlusterD as of now was blindly assuming that the brick port which was
already allocated would be available to be reused and that assumption
is absolutely wrong.
Solution : On first attempt, we thought GlusterD should check if the
already allocated brick ports are free, if not allocate new port and
pass it to the daemon. But with that approach there is a possibility
that if PMAP_SIGNOUT is missed out, the stale port will be given back
to the clients where connection will keep on failing. Now given the
port allocation always start from base_port, if everytime a new port
has to be allocated for the daemons, the port range will still be
under control. So this fix tries to clean up old port using
pmap_registry_remove () if any and then goes for pmap_registry_alloc ()
This patch is being ported to 3.8 branch because, the brick process
blindly re-using old port, without registering with the pmap server,
causes snapd daemon to not start properly, even though snapd registers
with the pmap server. With this patch, all the brick processes and
snapd will register with the pmap server to either get the same port,
or a new port, and avoid port collision.
> Reviewed-on: http://review.gluster.org/15005
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Avra Sengupta <asengupt@redhat.com>
(cherry picked from commit c3dee6d35326c6495591eb5bbf7f52f64031e2c4)
Change-Id: If54a055d01ab0cbc06589dc1191d8fc52eb2c84f
BUG: 1369766
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: http://review.gluster.org/15308
Tested-by: Avra Sengupta <asengupt@redhat.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Diffstat (limited to 'xlators/mgmt')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-messages.h | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 24 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.h | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 23 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 12 |
5 files changed, 41 insertions, 30 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index ba40b8f7628..623f4dc414e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 578 +#define GLFS_NUM_MESSAGES 579 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4673,6 +4673,14 @@ */ #define GD_MSG_DICT_GET_SUCCESS (GLUSTERD_COMP_BASE + 578) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_PMAP_REGISTRY_REMOVE_FAIL (GLUSTERD_COMP_BASE + 579) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 6a89a4fe6e3..fd1936db301 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -203,6 +203,29 @@ pmap_registry_alloc (xlator_t *this) return port; } +/* pmap_assign_port does a pmap_registry_remove followed by pmap_registry_alloc, + * the reason for the former is to ensure we don't end up with stale ports + */ +int +pmap_assign_port (xlator_t *this, int old_port, const char *path) +{ + int ret = -1; + int new_port = 0; + + if (old_port) { + ret = pmap_registry_remove (this, 0, path, + GF_PMAP_PORT_BRICKSERVER, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, + GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0, "Failed toi" + "remove pmap registry for older signin for path" + " %s", path); + } + } + new_port = pmap_registry_alloc (this); + return new_port; +} + int pmap_registry_bind (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt) @@ -452,7 +475,6 @@ __gluster_pmap_signout (rpcsvc_request_t *req) req->rpc_err = GARBAGE_ARGS; goto fail; } - rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick, GF_PMAP_PORT_BRICKSERVER, req->trans); diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h index 95ded04208d..14187daee2b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.h +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h @@ -35,6 +35,8 @@ struct pmap_registry { struct pmap_port_status ports[65536]; }; +int pmap_assign_port (xlator_t *this, int port, const char *path); +int pmap_mark_port_leased (xlator_t *this, int port); int pmap_registry_alloc (xlator_t *this); int pmap_registry_bind (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c index 830dc1a706d..36e4a196845 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c @@ -295,28 +295,7 @@ glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags) "--brick-name", snapd_id, "-S", svc->conn.sockpath, NULL); - /* Do a pmap registry remove on the older connected port */ - if (volinfo->snapd.port) { - ret = pmap_registry_remove (this, volinfo->snapd.port, - snapd_id, GF_PMAP_PORT_BRICKSERVER, - NULL); - if (ret) { - snprintf (msg, sizeof (msg), "Failed to remove pmap " - "registry for older signin"); - goto out; - } - } - - snapd_port = pmap_registry_alloc (THIS); - if (!snapd_port) { - snprintf (msg, sizeof (msg), "Could not allocate port " - "for snapd service for volume %s", - volinfo->volname); - runner_log (&runner, this->name, GF_LOG_DEBUG, msg); - ret = -1; - goto out; - } - + snapd_port = pmap_assign_port (THIS, volinfo->snapd.port, snapd_id); volinfo->snapd.port = snapd_port; runner_add_arg (&runner, "--brick-port"); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e89ed1102fe..e7ae9b7848d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1794,6 +1794,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, char socketpath[PATH_MAX] = {0}; char glusterd_uuid[1024] = {0,}; char valgrind_logfile[PATH_MAX] = {0}; + char rdma_brick_path[PATH_MAX] = {0,}; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1826,9 +1827,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, if (gf_is_service_running (pidfile, NULL)) goto connect; - port = brickinfo->port; - if (!port) - port = pmap_registry_alloc (THIS); + port = pmap_assign_port (THIS, brickinfo->port, brickinfo->path); /* Build the exp_path, before starting the glusterfsd even in valgrind mode. Otherwise all the glusterfsd processes start @@ -1893,9 +1892,10 @@ retry: if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { runner_argprintf (&runner, "%d", port); } else { - rdma_port = brickinfo->rdma_port; - if (!rdma_port) - rdma_port = pmap_registry_alloc (THIS); + snprintf (rdma_brick_path, sizeof(rdma_brick_path), "%s.rdma", + brickinfo->path); + rdma_port = pmap_assign_port (THIS, brickinfo->rdma_port, + rdma_brick_path); runner_argprintf (&runner, "%d,%d", port, rdma_port); runner_add_arg (&runner, "--xlator-option"); runner_argprintf (&runner, "%s-server.transport.rdma.listen-port=%d", |