summaryrefslogtreecommitdiffstats
path: root/rpc/rpc-transport/socket/src/socket.c
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2017-02-01 22:00:32 -0500
committerShyamsundar Ranganathan <srangana@redhat.com>2017-02-02 15:21:36 -0500
commit5a57c1592a34ee6632ca1fb38e076dde381d1ae2 (patch)
treef06422d16a8b89313202b10f673df70bd72c69fb /rpc/rpc-transport/socket/src/socket.c
parentae47befebeda2de5fd2d706090cbacf4ef60c785 (diff)
socket: retry connect immediately if it fails
Previously we relied on a complex dance of setting flags, shutting down the socket, tearing stuff down, getting an event, tearing more stuff down, and waiting for a higher-level retry. What we really need, in the case where we're just trying to connect prematurely e.g. to a brick that hasn't fully come up yet, is a simple retry of the connect(2) call. This was discovered by observing failures in ec-new-entry.t with multiplexing enabled, but probably fixes other random failures as well. Change-Id: Ibedb8942060bccc96b02272a333c3002c9b77d4c BUG: 1385758 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: https://review.gluster.org/16510 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'rpc/rpc-transport/socket/src/socket.c')
-rw-r--r--rpc/rpc-transport/socket/src/socket.c38
1 files changed, 36 insertions, 2 deletions
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 4b1505c4eef..990571289c1 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -2944,6 +2944,33 @@ socket_fix_ssl_opts (rpc_transport_t *this, socket_private_t *priv,
}
}
+/*
+ * If we might just be trying to connect prematurely, e.g. to a brick that's
+ * slow coming up, all we need is a simple retry. Don't worry about sleeping
+ * in some arbitrary thread. The connect(2) could already have the exact same
+ * effect, and we deal with it in that case so we can deal with it for sleep(2)
+ * as well.
+ */
+static int
+connect_loop (int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+ int ret;
+ int connect_fails = 0;
+
+ for (;;) {
+ ret = connect (sockfd, addr, addrlen);
+ if (ret >= 0) {
+ break;
+ }
+ if ((errno != ENOENT) || (++connect_fails >= 5)) {
+ break;
+ }
+ sleep (1);
+ }
+
+ return ret;
+}
+
static int
socket_connect (rpc_transport_t *this, int port)
{
@@ -3105,8 +3132,15 @@ socket_connect (rpc_transport_t *this, int port)
}
}
- ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
- this->peerinfo.sockaddr_len);
+ if (ign_enoent) {
+ ret = connect_loop (priv->sock,
+ SA (&this->peerinfo.sockaddr),
+ this->peerinfo.sockaddr_len);
+ } else {
+ ret = connect (priv->sock,
+ SA (&this->peerinfo.sockaddr),
+ this->peerinfo.sockaddr_len);
+ }
if (ret == -1 && errno == ENOENT && ign_enoent) {
gf_log (this->name, GF_LOG_WARNING,