summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c27
-rwxr-xr-xtests/bugs/fb4482137.t62
-rw-r--r--tests/volume.rc7
3 files changed, 94 insertions, 2 deletions
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index fe099f92f60..be18ed9f305 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -122,6 +122,7 @@ call_bail (void *data)
struct iovec iov = {0,};
char peerid[UNIX_PATH_MAX] = {0};
gf_boolean_t need_unref = _gf_false;
+ gf_boolean_t timedout_frames = _gf_false;
GF_VALIDATE_OR_GOTO ("client", data, out);
@@ -198,7 +199,6 @@ call_bail (void *data)
"--",
trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent,
conn->frame_timeout, peerid);
-
clnt = rpc_clnt_ref (clnt);
trav->rpcreq->rpc_status = -1;
trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
@@ -207,7 +207,30 @@ call_bail (void *data)
clnt = rpc_clnt_unref (clnt);
list_del_init (&trav->list);
mem_put (trav);
- }
+ timedout_frames = _gf_true;
+ }
+ /* So what on earth is this you ask? It was observed while testing
+ * the SHD threading code, that under high loads SHD/AFR related
+ * SyncOps & SyncTasks can actually hang/deadlock as the transport
+ * disconnected event never gets bubbled up correctly. Various
+ * tests indicated the ping timeouts worked fine, while "frame timeouts"
+ * did not. The only difference? Ping timeouts actually disconnect
+ * the transport while frame timeouts did not. So from a high-level we
+ * know this prevents deadlock as subsequent tests showed the deadlocks
+ * no longer ocurred (after this change). That said, there may be some
+ * more elegant solution. For now though, forcing a reconnect is
+ * preferential vs hanging clients or deadlocking the SHD.
+ *
+ * I suspect the culprit might be in
+ * afr-self-heal-common.c:afr_sh_common_lookup_cbk as this function
+ * will early-return if the callcount never actually reaches 0,
+ * which ordinarily is fine (you only want your callback called if
+ * the Nth response is received), but what happens if callcount
+ * never rearches 0? The callback won't be called. Theory at this
+ * point, but a good spot to start when we get a chance.
+ */
+ if (timedout_frames)
+ rpc_transport_disconnect (clnt->conn.trans);
out:
rpc_clnt_unref (clnt);
if (need_unref)
diff --git a/tests/bugs/fb4482137.t b/tests/bugs/fb4482137.t
new file mode 100755
index 00000000000..3616ab6022d
--- /dev/null
+++ b/tests/bugs/fb4482137.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+#
+# Test the scenario where a SHD daemon suffers a frame timeout during a
+# crawl. The expected behavior is that present crawl will continue
+# after the timeout and not deadlock.
+#
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function wait_for_shd_no_sink() {
+ local TIMEOUT=$1
+ # If we see the "no active sinks" log message we know
+ # the heal is alive. It cannot proceed as the "sink"
+ # is hung, but it's at least alive and trying.
+ timeout $TIMEOUT grep -q 'replicate-0: no active sinks for' \
+ <(tail -fn0 /var/log/glusterfs/glustershd.log)
+ return $?
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info 2> /dev/null;
+
+# Setup a cluster with 3 replicas, and fav child by majority on
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3};
+TEST $CLI volume set $V0 network.frame-timeout 2
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume set $V0 cluster.heal-timeout 10
+TEST $CLI volume start $V0
+sleep 5
+
+# Mount the volume
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \
+ --attribute-timeout=0 --entry-timeout=0
+
+# Kill bricks 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+sleep 1
+
+# Write some data into the mount which will require healing
+cd $M0
+for i in {1..1000}; do
+ dd if=/dev/urandom of=testdata_$i bs=64k count=1 2>/dev/null
+done
+
+# Re-start the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+
+sleep 1
+TEST hang_brick $V0 $H0 $B0/${V0}1
+sleep 4
+TEST wait_for_shd_no_sink 20
+cleanup
diff --git a/tests/volume.rc b/tests/volume.rc
index 5ea75a51d22..f75d8969e94 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -237,6 +237,13 @@ function kill_brick {
kill -9 $(get_brick_pid $vol $host $brick)
}
+function hang_brick {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ kill -STOP $(get_brick_pid $vol $host $brick)
+}
+
function check_option_help_presence {
local option=$1
$CLI volume set help | grep "^Option:" | grep -w $option