summaryrefslogtreecommitdiffstats
path: root/xlators/protocol/client/src/client-handshake.c
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-01-08 17:01:44 +0530
committerVijay Bellur <vbellur@redhat.com>2014-02-17 08:09:38 -0800
commit4a14159e82d7b736dec686a170b06e961d7aff53 (patch)
tree04fbf9da6eedd266798dcdf65e825615d0743f2e /xlators/protocol/client/src/client-handshake.c
parent7b3207168ef5496702ab10d48be4e4d7cf1f413b (diff)
protocol/client: conn-id should be unique when lk-heal is off
Problem: It was observed that in some cases client disconnects and re-connects before server xlator could detect that a disconnect happened. So it still uses previous fdtable and ltable. But it can so happen that in between disconnect and re-connect an 'unlock' fop may fail because the fds are marked 'bad' in client xlator upon disconnect. Due to this stale locks remain on the brick which lead to hangs/self-heals not happening etc. For the exact bug RCA please look at https://bugzilla.redhat.com/show_bug.cgi?id=1049932#c0 Fix: When lk-heal is not enabled make sure connection-id is different for every setvolume. This will make sure that a previous connection's resources are not re-used in server xlator. Change-Id: Id844aaa76dfcf2740db72533bca53c23b2fe5549 BUG: 1049932 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/6669 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/protocol/client/src/client-handshake.c')
-rw-r--r--xlators/protocol/client/src/client-handshake.c34
1 files changed, 26 insertions, 8 deletions
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index b2aa664227d..7c8be42ede2 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -465,17 +465,23 @@ client_set_lk_version (xlator_t *this)
clnt_conf_t *conf = NULL;
call_frame_t *frame = NULL;
gf_set_lk_ver_req req = {0, };
+ char *process_uuid = NULL;
GF_VALIDATE_OR_GOTO ("client", this, err);
conf = (clnt_conf_t *) this->private;
req.lk_ver = client_get_lk_ver (conf);
- ret = gf_asprintf (&req.uid, "%s-%s-%d",
- this->ctx->process_uuid, this->name,
- this->graph->id);
- if (ret == -1)
+ ret = dict_get_str (this->options, "process-uuid", &process_uuid);
+ if (!process_uuid) {
+ ret = -1;
goto err;
+ }
+ req.uid = gf_strdup (process_uuid);
+ if (!req.uid) {
+ ret = -1;
+ goto err;
+ }
frame = create_frame (this, this->ctx->pool);
if (!frame) {
@@ -1524,6 +1530,7 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)
char *process_uuid_xl = NULL;
clnt_conf_t *conf = NULL;
dict_t *options = NULL;
+ char counter_str[32] = {0};
options = this->options;
conf = this->private;
@@ -1549,13 +1556,24 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)
}
}
- /* With multiple graphs possible in the same process, we need a
+ /* When lock-heal is enabled:
+ * With multiple graphs possible in the same process, we need a
field to bring the uniqueness. Graph-ID should be enough to get the
- job done
+ job done.
+ * When lock-heal is disabled, connection-id should always be unique so
+ * that server never gets to reuse the previous connection resources
+ * so it cleans up the resources on every disconnect. Otherwise
+ * it may lead to stale resources, i.e. leaked file desciptors,
+ * inode/entry locks
*/
- ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d",
+ if (!conf->lk_heal) {
+ snprintf (counter_str, sizeof (counter_str),
+ "-%"PRIu64, conf->setvol_count);
+ conf->setvol_count++;
+ }
+ ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d%s",
this->ctx->process_uuid, this->name,
- this->graph->id);
+ this->graph->id, counter_str);
if (-1 == ret) {
gf_log (this->name, GF_LOG_ERROR,
"asprintf failed while setting process_uuid");