diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2014-01-08 17:01:44 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-02-17 08:09:38 -0800 |
commit | 4a14159e82d7b736dec686a170b06e961d7aff53 (patch) | |
tree | 04fbf9da6eedd266798dcdf65e825615d0743f2e /xlators/protocol/client/src/client-handshake.c | |
parent | 7b3207168ef5496702ab10d48be4e4d7cf1f413b (diff) |
protocol/client: conn-id should be unique when lk-heal is off
Problem:
It was observed that in some cases client disconnects
and re-connects before server xlator could detect that a
disconnect happened. So it still uses previous fdtable and ltable.
But it can so happen that in between disconnect and re-connect
an 'unlock' fop may fail because the fds are marked 'bad' in client
xlator upon disconnect. Due to this stale locks remain on the brick
which lead to hangs/self-heals not happening etc.
For the exact bug RCA please look at
https://bugzilla.redhat.com/show_bug.cgi?id=1049932#c0
Fix:
When lk-heal is not enabled make sure connection-id is different for
every setvolume. This will make sure that a previous connection's
resources are not re-used in server xlator.
Change-Id: Id844aaa76dfcf2740db72533bca53c23b2fe5549
BUG: 1049932
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/6669
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/protocol/client/src/client-handshake.c')
-rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index b2aa664227d..7c8be42ede2 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -465,17 +465,23 @@ client_set_lk_version (xlator_t *this) clnt_conf_t *conf = NULL; call_frame_t *frame = NULL; gf_set_lk_ver_req req = {0, }; + char *process_uuid = NULL; GF_VALIDATE_OR_GOTO ("client", this, err); conf = (clnt_conf_t *) this->private; req.lk_ver = client_get_lk_ver (conf); - ret = gf_asprintf (&req.uid, "%s-%s-%d", - this->ctx->process_uuid, this->name, - this->graph->id); - if (ret == -1) + ret = dict_get_str (this->options, "process-uuid", &process_uuid); + if (!process_uuid) { + ret = -1; goto err; + } + req.uid = gf_strdup (process_uuid); + if (!req.uid) { + ret = -1; + goto err; + } frame = create_frame (this, this->ctx->pool); if (!frame) { @@ -1524,6 +1530,7 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) char *process_uuid_xl = NULL; clnt_conf_t *conf = NULL; dict_t *options = NULL; + char counter_str[32] = {0}; options = this->options; conf = this->private; @@ -1549,13 +1556,24 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) } } - /* With multiple graphs possible in the same process, we need a + /* When lock-heal is enabled: + * With multiple graphs possible in the same process, we need a field to bring the uniqueness. Graph-ID should be enough to get the - job done + job done. + * When lock-heal is disabled, connection-id should always be unique so + * that server never gets to reuse the previous connection resources + * so it cleans up the resources on every disconnect. Otherwise + * it may lead to stale resources, i.e. leaked file desciptors, + * inode/entry locks */ - ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d", + if (!conf->lk_heal) { + snprintf (counter_str, sizeof (counter_str), + "-%"PRIu64, conf->setvol_count); + conf->setvol_count++; + } + ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d%s", this->ctx->process_uuid, this->name, - this->graph->id); + this->graph->id, counter_str); if (-1 == ret) { gf_log (this->name, GF_LOG_ERROR, "asprintf failed while setting process_uuid"); |