summaryrefslogtreecommitdiffstats
path: root/rpc/rpc-transport/socket/src
diff options
context:
space:
mode:
Diffstat (limited to 'rpc/rpc-transport/socket/src')
-rw-r--r--rpc/rpc-transport/socket/src/Makefile.am2
-rw-r--r--rpc/rpc-transport/socket/src/name.c30
-rw-r--r--rpc/rpc-transport/socket/src/socket.c531
-rw-r--r--rpc/rpc-transport/socket/src/socket.h19
4 files changed, 428 insertions, 154 deletions
diff --git a/rpc/rpc-transport/socket/src/Makefile.am b/rpc/rpc-transport/socket/src/Makefile.am
index 5f6dfe688..71e6ed6ff 100644
--- a/rpc/rpc-transport/socket/src/Makefile.am
+++ b/rpc/rpc-transport/socket/src/Makefile.am
@@ -3,7 +3,7 @@ noinst_HEADERS = socket.h name.h
rpctransport_LTLIBRARIES = socket.la
rpctransportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
-socket_la_LDFLAGS = -module -avoidversion
+socket_la_LDFLAGS = -module -avoid-version
socket_la_SOURCES = socket.c name.c
socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -lssl
diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
index d37c83e18..1647d5b6b 100644
--- a/rpc/rpc-transport/socket/src/name.c
+++ b/rpc/rpc-transport/socket/src/name.c
@@ -15,12 +15,6 @@
#include <netdb.h>
#include <string.h>
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
#ifndef AF_INET_SDP
#define AF_INET_SDP 27
#endif
@@ -40,9 +34,17 @@ static int32_t
af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
socklen_t sockaddr_len, int ceiling)
{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
+ int32_t ret = -1;
+ uint16_t port = ceiling - 1;
+ // by default assume none of the ports are blocked and all are available
+ gf_boolean_t ports[1024] = {_gf_false,};
+ int i = 0;
+
+ ret = gf_process_reserved_ports (ports);
+ if (ret != 0) {
+ for (i = 0; i < 1024; i++)
+ ports[i] = _gf_false;
+ }
while (port)
{
@@ -57,7 +59,11 @@ af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
break;
}
-
+ // ignore the reserved ports
+ if (ports[port] == _gf_true) {
+ port--;
+ continue;
+ }
ret = bind (fd, sockaddr, sockaddr_len);
if (ret == 0)
@@ -440,12 +446,12 @@ client_bind (rpc_transport_t *this,
case AF_INET6:
if (!this->bind_insecure) {
ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len, CLIENT_PORT_CEILING);
+ *sockaddr_len, GF_CLIENT_PORT_CEILING);
}
if (ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"cannot bind inet socket (%d) to port less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
+ sock, GF_CLIENT_PORT_CEILING, strerror (errno));
ret = 0;
}
break;
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index b043f52ef..93da3f296 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -147,19 +147,14 @@ typedef int SSL_trinary_func (SSL *, void *, int);
&in->pending_vector, \
&in->pending_count, \
&bytes_read); \
- if (ret == -1) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "reading from socket failed. Error (%s), " \
- "peer (%s)", strerror (errno), \
- this->peerinfo.identifier); \
+ if (ret == -1) \
break; \
- } \
__socket_proto_update_priv_after_read (priv, ret, bytes_read); \
}
-int socket_init (rpc_transport_t *this);
+static int socket_init (rpc_transport_t *this);
-void
+static void
ssl_dump_error_stack (const char *caller)
{
unsigned long errnum = 0;
@@ -173,7 +168,7 @@ ssl_dump_error_stack (const char *caller)
}
}
-int
+static int
ssl_do (rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func)
{
int r = (-1);
@@ -244,7 +239,7 @@ out:
#define ssl_read_one(t,b,l) ssl_do((t),(b),(l),(SSL_trinary_func *)SSL_read)
#define ssl_write_one(t,b,l) ssl_do((t),(b),(l),(SSL_trinary_func *)SSL_write)
-int
+static int
ssl_setup_connection (rpc_transport_t *this, int server)
{
X509 *peer = NULL;
@@ -303,10 +298,139 @@ ssl_error:
ssl_dump_error_stack(this->name);
free_ssl:
SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
done:
return ret;
}
+
+static void
+ssl_teardown_connection (socket_private_t *priv)
+{
+ SSL_shutdown(priv->ssl_ssl);
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+}
+
+
+static ssize_t
+__socket_ssl_readv (rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ int ret = -1;
+
+ priv = this->private;
+ sock = priv->sock;
+
+ if (priv->use_ssl) {
+ ret = ssl_read_one (this, opvector->iov_base, opvector->iov_len);
+ } else {
+ ret = readv (sock, opvector, opcount);
+ }
+
+ return ret;
+}
+
+
+static ssize_t
+__socket_ssl_read (rpc_transport_t *this, void *buf, size_t count)
+{
+ struct iovec iov = {0, };
+ int ret = -1;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = __socket_ssl_readv (this, &iov, 1);
+
+ return ret;
+}
+
+
+static int
+__socket_cached_read (rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ struct gf_sock_incoming *in = NULL;
+ int req_len = -1;
+ int ret = -1;
+
+ priv = this->private;
+ sock = priv->sock;
+ in = &priv->incoming;
+ req_len = iov_length (opvector, opcount);
+
+ if (in->record_state == SP_STATE_READING_FRAGHDR) {
+ in->ra_read = 0;
+ in->ra_served = 0;
+ in->ra_max = 0;
+ in->ra_buf = NULL;
+ goto uncached;
+ }
+
+ if (!in->ra_max) {
+ /* first call after passing SP_STATE_READING_FRAGHDR */
+ in->ra_max = min (RPC_FRAGSIZE (in->fraghdr), GF_SOCKET_RA_MAX);
+ /* Note that the in->iobuf is the primary iobuf into which
+ headers are read into. By using this itself as our
+ read-ahead cache, we can avoid memory copies in iov_load
+ */
+ in->ra_buf = iobuf_ptr (in->iobuf);
+ }
+
+ /* fill read-ahead */
+ if (in->ra_read < in->ra_max) {
+ ret = __socket_ssl_read (this, &in->ra_buf[in->ra_read],
+ (in->ra_max - in->ra_read));
+ if (ret > 0)
+ in->ra_read += ret;
+
+ /* we proceed to test if there is still cached data to
+ be served even if readahead could not progress */
+ }
+
+ /* serve cached */
+ if (in->ra_served < in->ra_read) {
+ ret = iov_load (opvector, opcount, &in->ra_buf[in->ra_served],
+ min (req_len, (in->ra_read - in->ra_served)));
+
+ in->ra_served += ret;
+ /* Do not read uncached and cached in the same call */
+ goto out;
+ }
+
+ if (in->ra_read < in->ra_max)
+ /* If there was no cached data to be served, (and we are
+ guaranteed to have already performed an attempt to progress
+ readahead above), and we have not yet read out the full
+ readahead capacity, then bail out for now without doing
+ the uncached read below (as that will overtake future cached
+ read)
+ */
+ goto out;
+uncached:
+ ret = __socket_ssl_readv (this, opvector, opcount);
+out:
+ return ret;
+}
+
+static gf_boolean_t
+__does_socket_rwv_error_need_logging (socket_private_t *priv, int write)
+{
+ int read = !write;
+
+ if (priv->connected == -1) /* Didn't even connect, of course it fails */
+ return _gf_false;
+
+ if (read && (priv->read_fail_log == _gf_false))
+ return _gf_false;
+
+ return _gf_true;
+}
+
/*
* return value:
* 0 = success (completed)
@@ -314,7 +438,7 @@ done:
* > 0 = incomplete
*/
-int
+static int
__socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
struct iovec **pending_vector, int *pending_count, size_t *bytes,
int write)
@@ -362,13 +486,8 @@ __socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
}
this->total_bytes_write += ret;
} else {
- if (priv->use_ssl) {
- ret = ssl_read_one(this,
- opvector->iov_base, opvector->iov_len);
- }
- else {
- ret = readv (sock, opvector, opcount);
- }
+ ret = __socket_cached_read (this, opvector, opcount);
+
if (ret == 0) {
gf_log(this->name,GF_LOG_DEBUG,"EOF on socket");
errno = ENODATA;
@@ -394,9 +513,15 @@ __socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
if (errno == EINTR)
continue;
- gf_log (this->name, GF_LOG_WARNING,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
+ if (__does_socket_rwv_error_need_logging (priv,
+ write)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s on %s failed (%s)",
+ write ? "writev":"readv",
+ this->peerinfo.identifier,
+ strerror (errno));
+ }
+
if (priv->use_ssl) {
ssl_dump_error_stack(this->name);
}
@@ -447,7 +572,7 @@ out:
}
-int
+static int
__socket_readv (rpc_transport_t *this, struct iovec *vector, int count,
struct iovec **pending_vector, int *pending_count,
size_t *bytes)
@@ -461,7 +586,7 @@ __socket_readv (rpc_transport_t *this, struct iovec *vector, int count,
}
-int
+static int
__socket_writev (rpc_transport_t *this, struct iovec *vector, int count,
struct iovec **pending_vector, int *pending_count)
{
@@ -474,41 +599,56 @@ __socket_writev (rpc_transport_t *this, struct iovec *vector, int count,
}
-int
+static int
+__socket_shutdown (rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = this->private;
+
+ priv->connected = -1;
+ ret = shutdown (priv->sock, SHUT_RDWR);
+ if (ret) {
+ /* its already disconnected.. no need to understand
+ why it failed to shutdown in normal cases */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "shutdown() returned %d. %s",
+ ret, strerror (errno));
+ }
+
+ return ret;
+}
+
+static int
__socket_disconnect (rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
int ret = -1;
+ socket_private_t *priv = NULL;
GF_VALIDATE_OR_GOTO ("socket", this, out);
GF_VALIDATE_OR_GOTO ("socket", this->private, out);
priv = this->private;
+ gf_log (this->name, GF_LOG_TRACE,
+ "disconnecting %p, state=%u gen=%u sock=%d", this,
+ priv->ot_state, priv->ot_gen, priv->sock);
+
if (priv->sock != -1) {
- priv->connected = -1;
- ret = shutdown (priv->sock, SHUT_RDWR);
- if (ret) {
- /* its already disconnected.. no need to understand
- why it failed to shutdown in normal cases */
- gf_log (this->name, GF_LOG_DEBUG,
- "shutdown() returned %d. %s",
- ret, strerror (errno));
- }
- if (priv->use_ssl) {
- SSL_shutdown(priv->ssl_ssl);
- SSL_clear(priv->ssl_ssl);
- SSL_free(priv->ssl_ssl);
- }
+ ret = __socket_shutdown(this);
if (priv->own_thread) {
- /*
- * Without this, reconnect (= disconnect + connect)
- * won't work except by accident.
- */
- close(priv->sock);
- priv->sock = -1;
- ++(priv->socket_gen);
- }
+ /*
+ * Without this, reconnect (= disconnect + connect)
+ * won't work except by accident.
+ */
+ close(priv->sock);
+ priv->sock = -1;
+ gf_log (this->name, GF_LOG_TRACE,
+ "OT_PLEASE_DIE on %p", this);
+ priv->ot_state = OT_PLEASE_DIE;
+ }
+ else if (priv->use_ssl) {
+ ssl_teardown_connection(priv);
+ }
}
out:
@@ -516,7 +656,7 @@ out:
}
-int
+static int
__socket_server_bind (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -572,7 +712,7 @@ out:
}
-int
+static int
__socket_nonblock (int fd)
{
int flags = 0;
@@ -586,7 +726,7 @@ __socket_nonblock (int fd)
return ret;
}
-int
+static int
__socket_nodelay (int fd)
{
int on = 1;
@@ -662,7 +802,7 @@ err:
}
-int
+static int
__socket_connect_finish (int fd)
{
int ret = -1;
@@ -680,7 +820,7 @@ __socket_connect_finish (int fd)
}
-void
+static void
__socket_reset (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -717,13 +857,13 @@ out:
}
-void
+static void
socket_set_lastfrag (uint32_t *fragsize) {
(*fragsize) |= 0x80000000U;
}
-void
+static void
socket_set_frag_header_size (uint32_t size, char *haddr)
{
size = htonl (size);
@@ -731,14 +871,14 @@ socket_set_frag_header_size (uint32_t size, char *haddr)
}
-void
+static void
socket_set_last_frag_header_size (uint32_t size, char *haddr)
{
socket_set_lastfrag (&size);
socket_set_frag_header_size (size, haddr);
}
-struct ioq *
+static struct ioq *
__socket_ioq_new (rpc_transport_t *this, rpc_transport_msg_t *msg)
{
struct ioq *entry = NULL;
@@ -805,7 +945,7 @@ out:
}
-void
+static void
__socket_ioq_entry_free (struct ioq *entry)
{
GF_VALIDATE_OR_GOTO ("socket", entry, out);
@@ -822,7 +962,7 @@ out:
}
-void
+static void
__socket_ioq_flush (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -843,7 +983,7 @@ out:
}
-int
+static int
__socket_ioq_churn_entry (rpc_transport_t *this, struct ioq *entry, int direct)
{
int ret = -1;
@@ -877,7 +1017,7 @@ __socket_ioq_churn_entry (rpc_transport_t *this, struct ioq *entry, int direct)
}
-int
+static int
__socket_ioq_churn (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -910,7 +1050,7 @@ out:
}
-int
+static int
socket_event_poll_err (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -935,7 +1075,7 @@ out:
}
-int
+static int
socket_event_poll_out (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -965,7 +1105,7 @@ out:
}
-inline int
+static inline int
__socket_read_simple_msg (rpc_transport_t *this)
{
int ret = 0;
@@ -1034,7 +1174,7 @@ out:
}
-inline int
+static inline int
__socket_read_simple_request (rpc_transport_t *this)
{
return __socket_read_simple_msg (this);
@@ -1051,7 +1191,7 @@ __socket_read_simple_request (rpc_transport_t *this)
#define rpc_progver_addr(buf) (buf + RPC_MSGTYPE_SIZE + 8)
#define rpc_procnum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 12)
-inline int
+static inline int
__socket_read_vectored_request (rpc_transport_t *this, rpcsvc_vector_sizer vector_sizer)
{
socket_private_t *priv = NULL;
@@ -1223,7 +1363,7 @@ out:
return ret;
}
-inline int
+static inline int
__socket_read_request (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -1273,7 +1413,7 @@ __socket_read_request (rpc_transport_t *this)
buf = rpc_procnum_addr (iobuf_ptr (in->iobuf));
procnum = ntoh32 (*((uint32_t *)buf));
- if (this->listener) {
+ if (priv->is_server) {
/* this check is needed as rpcsvc and rpc-clnt
* actor structures are not same */
vector_sizer =
@@ -1304,7 +1444,7 @@ out:
}
-inline int
+static inline int
__socket_read_accepted_successful_reply (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -1431,7 +1571,7 @@ out:
#define rpc_reply_verflen_addr(fragcurrent) ((char *)fragcurrent - 4)
#define rpc_reply_accept_status_addr(fragcurrent) ((char *)fragcurrent - 4)
-inline int
+static inline int
__socket_read_accepted_reply (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -1527,7 +1667,7 @@ out:
}
-inline int
+static inline int
__socket_read_denied_reply (rpc_transport_t *this)
{
return __socket_read_simple_msg (this);
@@ -1537,7 +1677,7 @@ __socket_read_denied_reply (rpc_transport_t *this)
#define rpc_reply_status_addr(fragcurrent) ((char *)fragcurrent - 4)
-inline int
+static inline int
__socket_read_vectored_reply (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -1603,7 +1743,7 @@ out:
}
-inline int
+static inline int
__socket_read_simple_reply (rpc_transport_t *this)
{
return __socket_read_simple_msg (this);
@@ -1611,7 +1751,7 @@ __socket_read_simple_reply (rpc_transport_t *this)
#define rpc_xid_addr(buf) (buf)
-inline int
+static inline int
__socket_read_reply (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -1681,7 +1821,7 @@ out:
/* returns the number of bytes yet to be read in a fragment */
-inline int
+static inline int
__socket_read_frag (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -1750,7 +1890,7 @@ out:
}
-inline
+static inline
void __socket_reset_priv (socket_private_t *priv)
{
struct gf_sock_incoming *in = NULL;
@@ -1779,7 +1919,7 @@ void __socket_reset_priv (socket_private_t *priv)
}
-int
+static int
__socket_proto_state_machine (rpc_transport_t *this,
rpc_transport_pollin_t **pollin)
{
@@ -1820,17 +1960,8 @@ __socket_proto_state_machine (rpc_transport_t *this,
&in->pending_vector,
&in->pending_count,
NULL);
- if (ret == -1) {
- if (priv->read_fail_log == 1) {
- gf_log (this->name,
- ((priv->connected == 1) ?
- GF_LOG_WARNING : GF_LOG_DEBUG),
- "reading from socket failed. Error (%s)"
- ", peer (%s)", strerror (errno),
- this->peerinfo.identifier);
- }
+ if (ret == -1)
goto out;
- }
if (ret > 0) {
gf_log (this->name, GF_LOG_TRACE, "partial "
@@ -1951,7 +2082,7 @@ out:
}
-int
+static int
socket_proto_state_machine (rpc_transport_t *this,
rpc_transport_pollin_t **pollin)
{
@@ -1974,17 +2105,22 @@ out:
}
-int
+static int
socket_event_poll_in (rpc_transport_t *this)
{
int ret = -1;
rpc_transport_pollin_t *pollin = NULL;
+ socket_private_t *priv = this->private;
ret = socket_proto_state_machine (this, &pollin);
if (pollin != NULL) {
+ priv->ot_state = OT_CALLBACK;
ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_RECEIVED,
pollin);
+ if (priv->ot_state == OT_CALLBACK) {
+ priv->ot_state = OT_RUNNING;
+ }
rpc_transport_pollin_destroy (pollin);
}
@@ -1992,7 +2128,7 @@ socket_event_poll_in (rpc_transport_t *this)
}
-int
+static int
socket_connect_finish (rpc_transport_t *this)
{
int ret = -1;
@@ -2026,8 +2162,6 @@ socket_connect_finish (rpc_transport_t *this)
priv->connect_finish_log = 1;
}
__socket_disconnect (this);
- notify_rpc = 1;
- event = RPC_TRANSPORT_DISCONNECT;
goto unlock;
}
@@ -2066,7 +2200,7 @@ out:
/* reads rpc_requests during pollin */
-int
+static int
socket_event_handler (int fd, int idx, void *data,
int poll_in, int poll_out, int poll_err)
{
@@ -2111,7 +2245,7 @@ out:
}
-void *
+static void *
socket_poller (void *ctx)
{
rpc_transport_t *this = ctx;
@@ -2119,7 +2253,9 @@ socket_poller (void *ctx)
struct pollfd pfd[2] = {{0,},};
gf_boolean_t to_write = _gf_false;
int ret = 0;
- int orig_gen;
+ uint32_t gen = 0;
+
+ priv->ot_state = OT_RUNNING;
if (priv->use_ssl) {
if (ssl_setup_connection(this,priv->connected) < 0) {
@@ -2139,8 +2275,6 @@ socket_poller (void *ctx)
}
}
- orig_gen = ++(priv->socket_gen);
-
if (priv->connected == 0) {
THIS = this->xl;
ret = socket_connect_finish (this);
@@ -2148,20 +2282,17 @@ socket_poller (void *ctx)
gf_log (this->name, GF_LOG_WARNING,
"asynchronous socket_connect_finish failed");
}
- ret = rpc_transport_notify (this->listener,
- RPC_TRANSPORT_ACCEPT, this);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "asynchronous rpc_transport_notify failed");
- }
}
+ ret = rpc_transport_notify (this->listener,
+ RPC_TRANSPORT_ACCEPT, this);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "asynchronous rpc_transport_notify failed");
+ }
+
+ gen = priv->ot_gen;
for (;;) {
- if (priv->socket_gen != orig_gen) {
- gf_log(this->name,GF_LOG_DEBUG,
- "redundant poller exiting");
- return NULL;
- }
pthread_mutex_lock(&priv->lock);
to_write = !list_empty(&priv->ioq);
pthread_mutex_unlock(&priv->lock);
@@ -2197,6 +2328,13 @@ socket_poller (void *ctx)
else if (errno == ENOTCONN) {
ret = 0;
}
+ if (priv->ot_state == OT_PLEASE_DIE) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "OT_IDLE on %p (input request)",
+ this);
+ priv->ot_state = OT_IDLE;
+ break;
+ }
}
else if (pfd[1].revents & POLL_MASK_OUTPUT) {
ret = socket_event_poll_out(this);
@@ -2207,6 +2345,13 @@ socket_poller (void *ctx)
else if (errno == ENOTCONN) {
ret = 0;
}
+ if (priv->ot_state == OT_PLEASE_DIE) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "OT_IDLE on %p (output request)",
+ this);
+ priv->ot_state = OT_IDLE;
+ break;
+ }
}
else {
/*
@@ -2227,19 +2372,64 @@ socket_poller (void *ctx)
"error in polling loop");
break;
}
+ if (priv->ot_gen != gen) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "generation mismatch, my %u != %u",
+ gen, priv->ot_gen);
+ return NULL;
+ }
}
err:
/* All (and only) I/O errors should come here. */
- __socket_disconnect (this);
- rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
- rpc_transport_unref (this);
+ pthread_mutex_lock(&priv->lock);
+ if (priv->ssl_ssl) {
+ /*
+ * We're always responsible for this part, but only actually
+ * have to do it if we got far enough for ssl_ssl to be valid
+ * (i.e. errors in ssl_setup_connection don't count).
+ */
+ ssl_teardown_connection(priv);
+ }
+ __socket_shutdown(this);
+ close(priv->sock);
+ priv->sock = -1;
+ priv->ot_state = OT_IDLE;
+ pthread_mutex_unlock(&priv->lock);
+ rpc_transport_notify (this->listener, RPC_TRANSPORT_DISCONNECT,
+ this);
+ rpc_transport_unref (this);
return NULL;
}
+static void
+socket_spawn (rpc_transport_t *this)
+{
+ socket_private_t *priv = this->private;
-int
+ switch (priv->ot_state) {
+ case OT_IDLE:
+ case OT_PLEASE_DIE:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "refusing to start redundant poller");
+ return;
+ }
+
+ priv->ot_gen += 7;
+ priv->ot_state = OT_SPAWNING;
+ gf_log (this->name, GF_LOG_TRACE,
+ "spawning %p with gen %u", this, priv->ot_gen);
+
+ if (gf_thread_create(&priv->thread,NULL,socket_poller,this) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create poll thread");
+ }
+}
+
+static int
socket_server_event_handler (int fd, int idx, void *data,
int poll_in, int poll_out, int poll_err)
{
@@ -2277,7 +2467,7 @@ socket_server_event_handler (int fd, int idx, void *data,
goto unlock;
}
- if (priv->nodelay) {
+ if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) {
ret = __socket_nodelay (new_sock);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
@@ -2287,7 +2477,8 @@ socket_server_event_handler (int fd, int idx, void *data,
}
}
- if (priv->keepalive) {
+ if (priv->keepalive &&
+ new_sockaddr.ss_family != AF_UNIX) {
ret = __socket_keepalive (new_sock,
new_sockaddr.ss_family,
priv->keepaliveintvl,
@@ -2303,6 +2494,15 @@ socket_server_event_handler (int fd, int idx, void *data,
if (!new_trans)
goto unlock;
+ ret = pthread_mutex_init(&new_trans->lock, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pthread_mutex_init() failed: %s",
+ strerror (errno));
+ close (new_sock);
+ goto unlock;
+ }
+
new_trans->name = gf_strdup (this->name);
memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
@@ -2374,6 +2574,7 @@ socket_server_event_handler (int fd, int idx, void *data,
* connection.
*/
new_priv->connected = 1;
+ new_priv->is_server = _gf_true;
rpc_transport_ref (new_trans);
if (new_priv->own_thread) {
@@ -2381,12 +2582,7 @@ socket_server_event_handler (int fd, int idx, void *data,
gf_log(this->name,GF_LOG_ERROR,
"could not create pipe");
}
- if (pthread_create(&new_priv->thread,
- NULL, socket_poller,
- new_trans) != 0) {
- gf_log(this->name,GF_LOG_ERROR,
- "could not create poll thread");
- }
+ socket_spawn(new_trans);
}
else {
new_priv->idx =
@@ -2421,7 +2617,7 @@ out:
}
-int
+static int
socket_disconnect (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -2443,7 +2639,7 @@ out:
}
-int
+static int
socket_connect (rpc_transport_t *this, int port)
{
int ret = -1;
@@ -2452,7 +2648,9 @@ socket_connect (rpc_transport_t *this, int port)
socklen_t sockaddr_len = 0;
glusterfs_ctx_t *ctx = NULL;
sa_family_t sa_family = {0, };
+ char *local_addr = NULL;
union gf_sock_union sock_union;
+ struct sockaddr_in *addr = NULL;
GF_VALIDATE_OR_GOTO ("socket", this, err);
GF_VALIDATE_OR_GOTO ("socket", this->private, err);
@@ -2480,6 +2678,10 @@ socket_connect (rpc_transport_t *this, int port)
goto err;
}
+ gf_log (this->name, GF_LOG_TRACE,
+ "connecting %p, state=%u gen=%u sock=%d", this,
+ priv->ot_state, priv->ot_gen, priv->sock);
+
ret = socket_client_get_remote_sockaddr (this, &sock_union.sa,
&sockaddr_len, &sa_family);
if (ret == -1) {
@@ -2549,7 +2751,7 @@ socket_connect (rpc_transport_t *this, int port)
}
}
- if (priv->nodelay) {
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
ret = __socket_nodelay (priv->sock);
if (ret == -1) {
@@ -2559,7 +2761,7 @@ socket_connect (rpc_transport_t *this, int port)
}
}
- if (priv->keepalive) {
+ if (priv->keepalive && sa_family != AF_UNIX) {
ret = __socket_keepalive (priv->sock,
sa_family,
priv->keepaliveintvl,
@@ -2573,6 +2775,15 @@ socket_connect (rpc_transport_t *this, int port)
SA (&this->myinfo.sockaddr)->sa_family =
SA (&this->peerinfo.sockaddr)->sa_family;
+ /* If a source addr is explicitly specified, use it */
+ ret = dict_get_str (this->options,
+ "transport.socket.source-addr",
+ &local_addr);
+ if (!ret && SA (&this->myinfo.sockaddr)->sa_family == AF_INET) {
+ addr = (struct sockaddr_in *)(&this->myinfo.sockaddr);
+ ret = inet_pton (AF_INET, local_addr, &(addr->sin_addr.s_addr));
+ }
+
ret = client_bind (this, SA (&this->myinfo.sockaddr),
&this->myinfo.sockaddr_len, priv->sock);
if (ret == -1) {
@@ -2583,13 +2794,30 @@ socket_connect (rpc_transport_t *this, int port)
goto unlock;
}
+ if (!priv->use_ssl && !priv->bio && !priv->own_thread) {
+ ret = __socket_nonblock (priv->sock);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "NBIO on %d failed (%s)",
+ priv->sock, strerror (errno));
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
+
ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
this->peerinfo.sockaddr_len);
if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
- gf_log (this->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
+ /* For unix path based sockets, the socket path is
+ * cryptic (md5sum of path) and may not be useful for
+ * the user in debugging so log it in DEBUG
+ */
+ gf_log (this->name, ((sa_family == AF_UNIX) ?
+ GF_LOG_DEBUG : GF_LOG_ERROR),
+ "connection attempt on %s failed, (%s)",
+ this->peerinfo.identifier, strerror (errno));
close (priv->sock);
priv->sock = -1;
goto unlock;
@@ -2624,6 +2852,7 @@ socket_connect (rpc_transport_t *this, int port)
* initializing a client connection.
*/
priv->connected = 0;
+ priv->is_server = _gf_false;
rpc_transport_ref (this);
if (priv->own_thread) {
@@ -2632,11 +2861,8 @@ socket_connect (rpc_transport_t *this, int port)
"could not create pipe");
}
- if (pthread_create(&priv->thread,NULL,
- socket_poller, this) != 0) {
- gf_log(this->name,GF_LOG_ERROR,
- "could not create poll thread");
- }
+ this->listener = this;
+ socket_spawn(this);
}
else {
priv->idx = event_register (ctx->event_pool, priv->sock,
@@ -2657,7 +2883,7 @@ err:
}
-int
+static int
socket_listen (rpc_transport_t *this)
{
socket_private_t * priv = NULL;
@@ -2739,7 +2965,7 @@ socket_listen (rpc_transport_t *this)
}
}
- if (priv->nodelay) {
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
ret = __socket_nodelay (priv->sock);
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
@@ -2808,7 +3034,7 @@ out:
}
-int32_t
+static int32_t
socket_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
{
socket_private_t *priv = NULL;
@@ -2882,7 +3108,7 @@ out:
}
-int32_t
+static int32_t
socket_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
{
socket_private_t *priv = NULL;
@@ -2956,7 +3182,7 @@ out:
}
-int32_t
+static int32_t
socket_getpeername (rpc_transport_t *this, char *hostname, int hostlen)
{
int32_t ret = -1;
@@ -2975,7 +3201,7 @@ out:
}
-int32_t
+static int32_t
socket_getpeeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
struct sockaddr_storage *sa, socklen_t salen)
{
@@ -2996,7 +3222,7 @@ out:
}
-int32_t
+static int32_t
socket_getmyname (rpc_transport_t *this, char *hostname, int hostlen)
{
int32_t ret = -1;
@@ -3015,7 +3241,7 @@ out:
}
-int32_t
+static int32_t
socket_getmyaddr (rpc_transport_t *this, char *myaddr, int addrlen,
struct sockaddr_storage *sa, socklen_t salen)
{
@@ -3035,6 +3261,25 @@ out:
}
+static int
+socket_throttle (rpc_transport_t *this, gf_boolean_t onoff)
+{
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* The way we implement throttling is by taking off
+ POLLIN event from the polled flags. This way we
+ never get called with the POLLIN event and therefore
+ will never read() any more data until throttling
+ is turned off.
+ */
+ priv->idx = event_select_on (this->ctx->event_pool, priv->sock,
+ priv->idx, (int) !onoff, -1);
+ return 0;
+}
+
+
struct rpc_transport_ops tops = {
.listen = socket_listen,
.connect = socket_connect,
@@ -3045,6 +3290,7 @@ struct rpc_transport_ops tops = {
.get_peeraddr = socket_getpeeraddr,
.get_myname = socket_getmyname,
.get_myaddr = socket_getmyaddr,
+ .throttle = socket_throttle,
};
int
@@ -3101,7 +3347,7 @@ out:
}
-int
+static int
socket_init (rpc_transport_t *this)
{
socket_private_t *priv = NULL;
@@ -3122,6 +3368,7 @@ socket_init (rpc_transport_t *this)
if (!priv) {
return -1;
}
+ memset(priv,0,sizeof(*priv));
pthread_mutex_init (&priv->lock, NULL);
@@ -3347,6 +3594,10 @@ socket_init (rpc_transport_t *this)
SSL_CTX_set_verify(priv->ssl_ctx,SSL_VERIFY_PEER,0);
}
+ if (priv->own_thread) {
+ priv->ot_state = OT_IDLE;
+ }
+
out:
this->private = priv;
return 0;
diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
index 2c4b44cf4..e0b412fcc 100644
--- a/rpc/rpc-transport/socket/src/socket.h
+++ b/rpc/rpc-transport/socket/src/socket.h
@@ -158,6 +158,8 @@ struct gf_sock_incoming_frag {
sp_rpcfrag_state_t state;
};
+#define GF_SOCKET_RA_MAX 1024
+
struct gf_sock_incoming {
sp_rpcrecord_state_t record_state;
struct gf_sock_incoming_frag frag;
@@ -175,8 +177,21 @@ struct gf_sock_incoming {
char complete_record;
msg_type_t msg_type;
size_t total_bytes_read;
+
+ size_t ra_read;
+ size_t ra_max;
+ size_t ra_served;
+ char *ra_buf;
};
+typedef enum {
+ OT_IDLE, /* Uninitialized or termination complete. */
+ OT_SPAWNING, /* Past pthread_create but not in thread yet. */
+ OT_RUNNING, /* Poller thread running normally. */
+ OT_CALLBACK, /* Poller thread in the middle of a callback. */
+ OT_PLEASE_DIE, /* Poller termination requested. */
+} ot_state_t;
+
typedef struct {
int32_t sock;
int32_t idx;
@@ -215,7 +230,9 @@ typedef struct {
pthread_t thread;
int pipe[2];
gf_boolean_t own_thread;
- volatile int socket_gen;
+ ot_state_t ot_state;
+ uint32_t ot_gen;
+ gf_boolean_t is_server;
} socket_private_t;