summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaghavendra G <rgowdapp@redhat.com>2017-08-29 15:07:53 +0530
committerRaghavendra G <rgowdapp@redhat.com>2017-09-06 06:20:42 +0000
commitb1b49997574eeb7c6a42e6e8257c81ac8d2d7578 (patch)
treea4ca6fff26df4f5c2d7d93aa4d617ac6ffd1b21e
parente50fc8f4e7eb51386f47bea9e6ca8d8490c09003 (diff)
event/epoll: don't call handler for events received after a pollerr
we register socket with EPOLLONESHOT, which means it has to be explicitly added back through epoll_ctl to receive more events. Normally we do this once the handler completes processing of current event. But event_select_on_epoll is one asynchronous codepath where socket can be added back for polling while an event on the same socket is being processed. event_select_on_epoll has a check whether an event is being processed in the form of slot->in_handler. But this check is not sufficient enough to prevent parallel events as slot->in_handler is not atomically incremented with respect to reception of the event. This means following imaginary sequence of events can happen: * epoll_wait returns with a POLLERR - say POLLERR1 - on a socket (sock1) associated with slot s1. socket_event_handle_pollerr is yet to be invoked. * an event_select_on called from __socket_ioq_churn which was called in request/reply/msg submission codepath (as opposed to __socket_ioq_churn called as part of POLLOUT handling - we cannot receive a POLLOUT due to EPOLLONESHOT) adds back sock1 for polling. * since sock1 was added back for polling in step 2 and our polling is level-triggered, another thread picks up another POLLERR event - say POLLERR2. socket_event_handler is invoked as part of processing POLLERR2 and it completes execution setting priv->sock to -1. * event_unregister_epoll called as part of __socket_reset due to POLLERR1 would receive fd as -1 resulting in assert failure. Also, since the first pollerr event has done rpc_transport_unref, subsequent parallel events (not just pollerr, but other events too) could be acting on a freed up transport too. Change-Id: I5db755068e7890ec755b59f7a35a57da110339eb BUG: 1486134 Signed-off-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-on: https://review.gluster.org/18129 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: mohammed rafi kc <rkavunga@redhat.com>
-rw-r--r--libglusterfs/src/event-epoll.c23
1 files changed, 17 insertions, 6 deletions
diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
index d32f1dda9d0..7fc53ffeaf6 100644
--- a/libglusterfs/src/event-epoll.c
+++ b/libglusterfs/src/event-epoll.c
@@ -35,6 +35,7 @@ struct event_slot_epoll {
int ref;
int do_close;
int in_handler;
+ int handled_error;
void *data;
event_handler_t handler;
gf_lock_t lock;
@@ -159,6 +160,8 @@ __event_slot_dealloc (struct event_pool *event_pool, int idx)
slot->gen++;
slot->fd = -1;
+ slot->handled_error = 0;
+ slot->in_handler = 0;
event_pool->slots_used[table_idx]--;
return;
@@ -527,6 +530,7 @@ event_dispatch_epoll_handler (struct event_pool *event_pool,
int gen = -1;
int ret = -1;
int fd = -1;
+ gf_boolean_t handled_error_previously = _gf_false;
ev_data = (void *)&event->data;
handler = NULL;
@@ -561,7 +565,13 @@ event_dispatch_epoll_handler (struct event_pool *event_pool,
handler = slot->handler;
data = slot->data;
- slot->in_handler++;
+ if (slot->handled_error) {
+ handled_error_previously = _gf_true;
+ } else {
+ slot->handled_error = (event->events
+ & (EPOLLERR|EPOLLHUP));
+ slot->in_handler++;
+ }
}
pre_unlock:
UNLOCK (&slot->lock);
@@ -569,11 +579,12 @@ pre_unlock:
if (!handler)
goto out;
- ret = handler (fd, idx, gen, data,
- (event->events & (EPOLLIN|EPOLLPRI)),
- (event->events & (EPOLLOUT)),
- (event->events & (EPOLLERR|EPOLLHUP)));
-
+ if (!handled_error_previously) {
+ ret = handler (fd, idx, gen, data,
+ (event->events & (EPOLLIN|EPOLLPRI)),
+ (event->events & (EPOLLOUT)),
+ (event->events & (EPOLLERR|EPOLLHUP)));
+ }
out:
event_slot_unref (event_pool, slot, idx);