diff options
author | Atin Mukherjee <amukherj@redhat.com> | 2016-09-08 11:33:59 +0530 |
---|---|---|
committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2016-09-19 06:51:09 -0700 |
commit | 12ae3c448c8435decdff27643d5785698ac19dff (patch) | |
tree | b95c96a17448039066c7399ebc7859f306016296 | |
parent | e9b39527d5dcfba95c4c52a522c8ce1f4512ac21 (diff) |
socket: pollerr event shouldn't trigger socket_connnect_finish
If connect fails with any other error than EINPROGRESS we cannot get
the error status using getsockopt (... SO_ERROR ... ). Hence we need
to remember the state of connect and take appropriate action in the
event_handler for the same.
As an added note, a event can come where poll_err is HUP and we have
poll_in as well (i.e some status was written to the socket), so for
such cases we need to finish the connect, process the data and then
the poll_err as is the case in the current code.
Special thanks to Kaushal M & Raghavendra G for figuring out the issue.
Change-Id: Ic45ad59ff8ab1d0a9d2cab2c924ad940b9d38528
BUG: 1372356
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Signed-off-by: Shyam <srangana@redhat.com>
Reviewed-on: http://review.gluster.org/15440
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r-- | rpc/rpc-transport/socket/src/socket.c | 42 | ||||
-rw-r--r-- | rpc/rpc-transport/socket/src/socket.h | 3 | ||||
-rw-r--r-- | tests/bugs/changelog/bug-1211327.t | 8 | ||||
-rw-r--r-- | tests/bugs/ec/bug-1236065.t | 4 |
4 files changed, 49 insertions, 8 deletions
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index ad578dd740c..11d029a2659 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2344,6 +2344,7 @@ out: return ret; } +static int socket_disconnect (rpc_transport_t *this); /* reads rpc_requests during pollin */ static int @@ -2368,7 +2369,23 @@ socket_event_handler (int fd, int idx, void *data, } pthread_mutex_unlock (&priv->lock); - ret = (priv->connected == 1) ? 0 : socket_connect_finish(this); + if (priv->connected != 1) { + if (priv->connect_failed) { + /* connect failed with some other error than + EINPROGRESS or ENOENT, so nothing more to do, fail + reading/writing anything even if poll_in or poll_out + is set */ + ret = socket_disconnect (this); + + /* Force ret to be -1, as we are officially done with + this socket */ + ret = -1; + } else { + ret = socket_connect_finish (this); + } + } else { + ret = 0; + } if (!ret && poll_out) { ret = socket_event_poll_out (this); @@ -3044,6 +3061,16 @@ socket_connect (rpc_transport_t *this, int port) gf_log (this->name, GF_LOG_WARNING, "Ignore failed connection attempt on %s, (%s) ", this->peerinfo.identifier, strerror (errno)); + + /* connect failed with some other error than EINPROGRESS + so, getsockopt (... SO_ERROR ...), will not catch any + errors and return them to us, we need to remember this + state, and take actions in socket_event_handler + appropriately */ + /* TBD: What about ENOENT, we will do getsockopt there + as well, so how is that exempt from such a problem? */ + priv->connect_failed = 1; + goto handler; } @@ -3056,9 +3083,22 @@ socket_connect (rpc_transport_t *this, int port) GF_LOG_DEBUG : GF_LOG_ERROR), "connection attempt on %s failed, (%s)", this->peerinfo.identifier, strerror (errno)); + + /* connect failed with some other error than EINPROGRESS + so, getsockopt (... SO_ERROR ...), will not catch any + errors and return them to us, we need to remember this + state, and take actions in socket_event_handler + appropriately */ + /* TBD: What about ENOENT, we will do getsockopt there + as well, so how is that exempt from such a problem? */ + priv->connect_failed = 1; + goto handler; } else { + /* reset connect_failed so that any previous attempts + state is not carried forward */ + priv->connect_failed = 0; ret = 0; } diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h index 8395fd2fa58..7c7005b59e7 100644 --- a/rpc/rpc-transport/socket/src/socket.h +++ b/rpc/rpc-transport/socket/src/socket.h @@ -200,6 +200,9 @@ typedef struct { int32_t idx; /* -1 = not connected. 0 = in progress. 1 = connected */ char connected; + /* 1 = connect failed for reasons other than EINPROGRESS/ENOENT + see socket_connect for details */ + char connect_failed; char bio; char connect_finish_log; char submit_log; diff --git a/tests/bugs/changelog/bug-1211327.t b/tests/bugs/changelog/bug-1211327.t index 19d6e76ecab..a849ec3981f 100644 --- a/tests/bugs/changelog/bug-1211327.t +++ b/tests/bugs/changelog/bug-1211327.t @@ -27,15 +27,13 @@ TEST $CLI volume set $V0 changelog.changelog on; sleep 1 TEST killall_gluster; -sleep 1 -EXPECT 0 online_brick_count; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" online_brick_count TEST glusterd; TEST pidof glusterd; -##Let the brick processes starts -sleep 1; -EXPECT 1 online_brick_count; +##Let the brick processes starts +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ##On brick restart only one HTIME should be found. EXPECT 1 count_htime_files; diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t index 35e4e3d1cca..9395aa33e8c 100644 --- a/tests/bugs/ec/bug-1236065.t +++ b/tests/bugs/ec/bug-1236065.t @@ -48,7 +48,7 @@ TEST ec_test_make ## step 4 TEST $CLI volume start $V0 force -EXPECT '7' online_brick_count +EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count # active heal EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -70,7 +70,7 @@ EXPECT '5' online_brick_count ## step 6 TEST $CLI volume start $V0 force -EXPECT '7' online_brick_count +EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count # self-healing EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status |