From 4bbaf4615be673dec70fc3e1f127139b6d079869 Mon Sep 17 00:00:00 2001 From: Emmanuel Dreyfus Date: Sat, 16 Aug 2014 02:32:49 +0200 Subject: Fix glustershd detection on volume restart On NetBSD and FreeBSD, doing a 'gluster volume start $volume force' causes NFS server, quotad, snapd and glustershd to be undetected by glusterd once the volume has restarted. 'gluster volume status' shows the three processes as 'N' in the online column, while they have been launched successfully. This happens because glusterd attempts to connect to its child processes just between the child does a unlink() on the socket in __socket_server_bind() and the time it calls bind() and listen(). Different scheduling policy may explain why the problem does not happen on Linux, but it may pop up some day since we make no guaranteed assumptions here. This patchet works this around by introducing a boolean transport.socket.ignore-enoent option, set by nfs and glustershd, which prevents ENOENT to be fatal and cause glusterd to retry and suceed later. Behavior of other clients is unaffected. BUG: 1129939 Change-Id: Ifdc4d45b2513743ed42ee235a5c61a086321644c Signed-off-by: Emmanuel Dreyfus Reviewed-on: http://review.gluster.org/8403 Tested-by: Gluster Build System Reviewed-by: Niels de Vos Reviewed-by: Vijay Bellur --- rpc/rpc-transport/socket/src/socket.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'rpc/rpc-transport') diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index 94b243bab15..bd3f271cf1a 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2752,6 +2752,7 @@ socket_connect (rpc_transport_t *this, int port) socket_connect_error_state_t *arg = NULL; pthread_t th_id = {0, }; char *cname = NULL; + gf_boolean_t ign_enoent = _gf_false; GF_VALIDATE_OR_GOTO ("socket", this, err); GF_VALIDATE_OR_GOTO ("socket", this->private, err); @@ -2871,6 +2872,10 @@ socket_connect (rpc_transport_t *this, int port) &(addr->sin_addr.s_addr)); } + /* If client wants ENOENT to be ignored */ + ign_enoent = dict_get_str_boolean (this->options, + "transport.socket.ignore-enoent", _gf_false); + ret = client_bind (this, SA (&this->myinfo.sockaddr), &this->myinfo.sockaddr_len, priv->sock); if (ret == -1) { @@ -2892,6 +2897,13 @@ socket_connect (rpc_transport_t *this, int port) ret = connect (priv->sock, SA (&this->peerinfo.sockaddr), this->peerinfo.sockaddr_len); + if (ret == -1 && errno == ENOENT && ign_enoent) { + gf_log (this->name, GF_LOG_WARNING, + "Ignore failed connection attempt on %s, (%s) ", + this->peerinfo.identifier, strerror (errno)); + goto handler; + } + if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) { /* For unix path based sockets, the socket path is * cryptic (md5sum of path) and may not be useful for -- cgit