diff options
| -rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.c | 27 | ||||
| -rwxr-xr-x | tests/bugs/fb4482137.t | 62 | ||||
| -rw-r--r-- | tests/volume.rc | 7 |
3 files changed, 94 insertions, 2 deletions
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index fe099f92f60..be18ed9f305 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -122,6 +122,7 @@ call_bail (void *data) struct iovec iov = {0,}; char peerid[UNIX_PATH_MAX] = {0}; gf_boolean_t need_unref = _gf_false; + gf_boolean_t timedout_frames = _gf_false; GF_VALIDATE_OR_GOTO ("client", data, out); @@ -198,7 +199,6 @@ call_bail (void *data) "--", trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent, conn->frame_timeout, peerid); - clnt = rpc_clnt_ref (clnt); trav->rpcreq->rpc_status = -1; trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame); @@ -207,7 +207,30 @@ call_bail (void *data) clnt = rpc_clnt_unref (clnt); list_del_init (&trav->list); mem_put (trav); - } + timedout_frames = _gf_true; + } + /* So what on earth is this you ask? It was observed while testing + * the SHD threading code, that under high loads SHD/AFR related + * SyncOps & SyncTasks can actually hang/deadlock as the transport + * disconnected event never gets bubbled up correctly. Various + * tests indicated the ping timeouts worked fine, while "frame timeouts" + * did not. The only difference? Ping timeouts actually disconnect + * the transport while frame timeouts did not. So from a high-level we + * know this prevents deadlock as subsequent tests showed the deadlocks + * no longer ocurred (after this change). That said, there may be some + * more elegant solution. For now though, forcing a reconnect is + * preferential vs hanging clients or deadlocking the SHD. + * + * I suspect the culprit might be in + * afr-self-heal-common.c:afr_sh_common_lookup_cbk as this function + * will early-return if the callcount never actually reaches 0, + * which ordinarily is fine (you only want your callback called if + * the Nth response is received), but what happens if callcount + * never rearches 0? The callback won't be called. Theory at this + * point, but a good spot to start when we get a chance. + */ + if (timedout_frames) + rpc_transport_disconnect (clnt->conn.trans); out: rpc_clnt_unref (clnt); if (need_unref) diff --git a/tests/bugs/fb4482137.t b/tests/bugs/fb4482137.t new file mode 100755 index 00000000000..3616ab6022d --- /dev/null +++ b/tests/bugs/fb4482137.t @@ -0,0 +1,62 @@ +#!/bin/bash + +# +# Test the scenario where a SHD daemon suffers a frame timeout during a +# crawl. The expected behavior is that present crawl will continue +# after the timeout and not deadlock. +# + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +function wait_for_shd_no_sink() { + local TIMEOUT=$1 + # If we see the "no active sinks" log message we know + # the heal is alive. It cannot proceed as the "sink" + # is hung, but it's at least alive and trying. + timeout $TIMEOUT grep -q 'replicate-0: no active sinks for' \ + <(tail -fn0 /var/log/glusterfs/glustershd.log) + return $? +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info 2> /dev/null; + +# Setup a cluster with 3 replicas, and fav child by majority on +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}; +TEST $CLI volume set $V0 network.frame-timeout 2 +TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.self-heal-daemon on +TEST $CLI volume set $V0 cluster.heal-timeout 10 +TEST $CLI volume start $V0 +sleep 5 + +# Mount the volume +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \ + --attribute-timeout=0 --entry-timeout=0 + +# Kill bricks 1 +TEST kill_brick $V0 $H0 $B0/${V0}1 +sleep 1 + +# Write some data into the mount which will require healing +cd $M0 +for i in {1..1000}; do + dd if=/dev/urandom of=testdata_$i bs=64k count=1 2>/dev/null +done + +# Re-start the brick +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 + +sleep 1 +TEST hang_brick $V0 $H0 $B0/${V0}1 +sleep 4 +TEST wait_for_shd_no_sink 20 +cleanup diff --git a/tests/volume.rc b/tests/volume.rc index 5ea75a51d22..f75d8969e94 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -237,6 +237,13 @@ function kill_brick { kill -9 $(get_brick_pid $vol $host $brick) } +function hang_brick { + local vol=$1 + local host=$2 + local brick=$3 + kill -STOP $(get_brick_pid $vol $host $brick) +} + function check_option_help_presence { local option=$1 $CLI volume set help | grep "^Option:" | grep -w $option |
