diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2014-01-08 17:01:44 +0530 | 
|---|---|---|
| committer | Niels de Vos <ndevos@redhat.com> | 2014-06-27 01:42:03 -0700 | 
| commit | efb5af6c1a66fc6d8bebb1c96e8b39d6fa6f8dcd (patch) | |
| tree | 866d0052f2980376e04f3adbfd1ac0bd80056ab6 /xlators | |
| parent | e672e3dd750eeb39ad1cd2eeb0b7b51920928b50 (diff) | |
protocol/client: conn-id should be unique when lk-heal is off
        Backport of http://review.gluster.org/6669
Problem:
It was observed that in some cases client disconnects
and re-connects before server xlator could detect that a
disconnect happened. So it still uses previous fdtable and ltable.
But it can so happen that in between disconnect and re-connect
an 'unlock' fop may fail because the fds are marked 'bad' in client
xlator upon disconnect. Due to this stale locks remain on the brick
which lead to hangs/self-heals not happening etc.
For the exact bug RCA please look at
https://bugzilla.redhat.com/show_bug.cgi?id=1049932#c0
Fix:
When lk-heal is not enabled make sure connection-id is different for
every setvolume. This will make sure that a previous connection's
resources are not re-used in server xlator.
BUG: 1113894
Change-Id: I5090f832730e4072c4b6b6758e64f757b911bd49
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/8187
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 34 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client.h | 5 | 
2 files changed, 31 insertions, 8 deletions
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index b2aa664227d..7c8be42ede2 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -465,17 +465,23 @@ client_set_lk_version (xlator_t *this)          clnt_conf_t        *conf     = NULL;          call_frame_t       *frame    = NULL;          gf_set_lk_ver_req   req      = {0, }; +        char               *process_uuid = NULL;          GF_VALIDATE_OR_GOTO ("client", this, err);          conf = (clnt_conf_t *) this->private;          req.lk_ver = client_get_lk_ver (conf); -        ret = gf_asprintf (&req.uid, "%s-%s-%d", -                           this->ctx->process_uuid, this->name, -                           this->graph->id); -        if (ret == -1) +        ret = dict_get_str (this->options, "process-uuid", &process_uuid); +        if (!process_uuid) { +                ret = -1;                  goto err; +        } +        req.uid = gf_strdup (process_uuid); +        if (!req.uid) { +                ret = -1; +                goto err; +        }          frame = create_frame (this, this->ctx->pool);          if (!frame) { @@ -1524,6 +1530,7 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)          char             *process_uuid_xl = NULL;          clnt_conf_t      *conf            = NULL;          dict_t           *options         = NULL; +        char             counter_str[32]  = {0};          options = this->options;          conf    = this->private; @@ -1549,13 +1556,24 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)                  }          } -        /* With multiple graphs possible in the same process, we need a +        /* When lock-heal is enabled: +         * With multiple graphs possible in the same process, we need a             field to bring the uniqueness. Graph-ID should be enough to get the -           job done +           job done. +         * When lock-heal is disabled, connection-id should always be unique so +         * that server never gets to reuse the previous connection resources +         * so it cleans up the resources on every disconnect. Otherwise +         * it may lead to stale resources, i.e. leaked file desciptors, +         * inode/entry locks          */ -        ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d", +        if (!conf->lk_heal) { +                snprintf (counter_str, sizeof (counter_str), +                          "-%"PRIu64, conf->setvol_count); +                conf->setvol_count++; +        } +        ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d%s",                             this->ctx->process_uuid, this->name, -                           this->graph->id); +                           this->graph->id, counter_str);          if (-1 == ret) {                  gf_log (this->name, GF_LOG_ERROR,                          "asprintf failed while setting process_uuid"); diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index b31e6172149..8c3481f87b7 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -121,6 +121,11 @@ typedef struct clnt_conf {          gf_boolean_t           filter_o_direct; /* if set, filter O_DIRECT from                                                     the flags list of open() */          gf_boolean_t           send_gids; /* let the server resolve gids */ +        /* set volume is the op which results in creating/re-using +         * the conn-id and is called once per connection, this remembers +         * how manytimes set_volume is called +         */ +        uint64_t               setvol_count;  } clnt_conf_t;  typedef struct _client_fd_ctx {  | 
