summaryrefslogtreecommitdiffstats
path: root/rpc
diff options
context:
space:
mode:
authorEmmanuel Dreyfus <manu@netbsd.org>2014-08-16 02:32:49 +0200
committerVijay Bellur <vbellur@redhat.com>2014-08-25 23:06:34 -0700
commit4bbaf4615be673dec70fc3e1f127139b6d079869 (patch)
treeef161d12f290e0ba2bf88fe3e77484bd368c19e1 /rpc
parentc2a0e3f722583731599785d16f12be5b3472934b (diff)
Fix glustershd detection on volume restart
On NetBSD and FreeBSD, doing a 'gluster volume start $volume force' causes NFS server, quotad, snapd and glustershd to be undetected by glusterd once the volume has restarted. 'gluster volume status' shows the three processes as 'N' in the online column, while they have been launched successfully. This happens because glusterd attempts to connect to its child processes just between the child does a unlink() on the socket in __socket_server_bind() and the time it calls bind() and listen(). Different scheduling policy may explain why the problem does not happen on Linux, but it may pop up some day since we make no guaranteed assumptions here. This patchet works this around by introducing a boolean transport.socket.ignore-enoent option, set by nfs and glustershd, which prevents ENOENT to be fatal and cause glusterd to retry and suceed later. Behavior of other clients is unaffected. BUG: 1129939 Change-Id: Ifdc4d45b2513743ed42ee235a5c61a086321644c Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org> Reviewed-on: http://review.gluster.org/8403 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Niels de Vos <ndevos@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'rpc')
-rw-r--r--rpc/rpc-transport/socket/src/socket.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 94b243bab15..bd3f271cf1a 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -2752,6 +2752,7 @@ socket_connect (rpc_transport_t *this, int port)
socket_connect_error_state_t *arg = NULL;
pthread_t th_id = {0, };
char *cname = NULL;
+ gf_boolean_t ign_enoent = _gf_false;
GF_VALIDATE_OR_GOTO ("socket", this, err);
GF_VALIDATE_OR_GOTO ("socket", this->private, err);
@@ -2871,6 +2872,10 @@ socket_connect (rpc_transport_t *this, int port)
&(addr->sin_addr.s_addr));
}
+ /* If client wants ENOENT to be ignored */
+ ign_enoent = dict_get_str_boolean (this->options,
+ "transport.socket.ignore-enoent", _gf_false);
+
ret = client_bind (this, SA (&this->myinfo.sockaddr),
&this->myinfo.sockaddr_len, priv->sock);
if (ret == -1) {
@@ -2892,6 +2897,13 @@ socket_connect (rpc_transport_t *this, int port)
ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
this->peerinfo.sockaddr_len);
+ if (ret == -1 && errno == ENOENT && ign_enoent) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Ignore failed connection attempt on %s, (%s) ",
+ this->peerinfo.identifier, strerror (errno));
+ goto handler;
+ }
+
if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
/* For unix path based sockets, the socket path is
* cryptic (md5sum of path) and may not be useful for