summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@fb.com>2017-07-14 17:40:45 -0700
committerShreyas Siravara <sshreyas@fb.com>2017-09-09 16:54:52 +0000
commit4d8268d7602979795972d976dab4959d5e0db55b (patch)
treedd0d1b8712559e94abc070369f1f1985ace90fc8
parent898c491328b804db4ea588348c8196347a131ffe (diff)
io-threads: nuke everything from a client when it disconnects
Summary: These requests haven't been issued, yet alone acknowledged. They would disappear if we crashed, which to the client is indistinguishable from any other kind of disconnection - if indeed the client itself isn't the one that died. So we're completely within our rights to discard these. There are strong hints that such "orphan" requests are part of how we get into the lock-revocation hangs we've been seeing for a while. Even if that theory doesn't pan out, there's no good reason to keep them around clogging up queues and so forth. This is a port of D5430057 & D5662545 to 3.8 Change-Id: Ie4c88f7791aac85540631f60f5c639497468ad76 Reviewed-on: https://review.gluster.org/18254 Reviewed-by: Shreyas Siravara <sshreyas@fb.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r--libglusterfs/src/call-stub.h1
-rw-r--r--xlators/performance/io-threads/src/io-threads.c41
2 files changed, 39 insertions, 3 deletions
diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h
index 5779ee36e58..67a5a650ca5 100644
--- a/libglusterfs/src/call-stub.h
+++ b/libglusterfs/src/call-stub.h
@@ -22,6 +22,7 @@ typedef struct _call_stub {
char wind;
call_frame_t *frame;
glusterfs_fop_t fop;
+ gf_boolean_t poison;
struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
uint32_t jnl_meta_len;
uint32_t jnl_data_len;
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 97d2dea1b71..09ae02bf2ac 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -220,8 +220,15 @@ iot_worker (void *data)
}
pthread_mutex_unlock (&conf->mutex);
- if (stub) /* guard against spurious wakeups */
- call_resume (stub);
+ if (stub) { /* guard against spurious wakeups */
+ if (stub->poison) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Dropping poisoned request %p.", stub);
+ call_stub_destroy (stub);
+ } else {
+ call_resume (stub);
+ }
+ }
stub = NULL;
if (bye)
@@ -1306,6 +1313,32 @@ fini (xlator_t *this)
return;
}
+static int
+iot_disconnect_cbk (xlator_t *this, client_t *client)
+{
+ int i;
+ call_stub_t *curr;
+ call_stub_t *next;
+ iot_conf_t *conf = this->private;
+
+ pthread_mutex_lock (&conf->mutex);
+ for (i = 0; i < IOT_PRI_MAX; i++) {
+ list_for_each_entry_safe (curr, next, &conf->reqs[i], list) {
+ if (curr->frame->root->client != client) {
+ continue;
+ }
+ gf_log (this->name, GF_LOG_INFO,
+ "poisoning %s fop at %p for client %s",
+ gf_fop_list[curr->fop], curr,
+ client->client_uid);
+ curr->poison = _gf_true;
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
+ return 0;
+}
+
struct xlator_dumpops dumpops = {
.priv = iot_priv_dump,
};
@@ -1357,7 +1390,9 @@ struct xlator_fops fops = {
.zerofill = iot_zerofill,
};
-struct xlator_cbks cbks;
+struct xlator_cbks cbks = {
+ .client_disconnect = iot_disconnect_cbk,
+};
struct volume_options options[] = {
{ .key = {"thread-count"},