From f764516c2e526624ce0088963924ff2d88304553 Mon Sep 17 00:00:00 2001 From: Mohammed Junaid Date: Wed, 8 Feb 2012 18:06:39 +0530 Subject: protocol/client,server: fcntl lock self healing. Currently(with out this patch), on a disconnect the server cleans up the transport which inturn closes the fd's and releases the locks acquired on those fd's by that client. On a reconnect, client just reopens the fd's but doesn't reacquire the locks. The application that had previously acquired the locks still is under the assumption that it is the owner of those locks which might have been granted to other clients(if they request) by the server leading to data corruption. This patch allows the client to reacquire the fcntl locks (held on the fd's) during client-server handshake. * The server identifies the client via process-uuid-xl (which is a combination of uuid and client-protocol name, it is assumed to be unique) and lk-version number. * The client maintains a list of process-uuid-xl, lk-version pair for each accepted connection. On a connect, the server traverses the list for a matching pair, if a matching pair is not found the the server returns lk-version with value 0, else it returns the lk-version it has in store. * On a disconnect, the server and client enter grace period, and on the completion of the grace period, the client bumps up its lk-version number (which means, it will reacquire the locks the next time) and the server will distroy the connection. If reconnection happens within the grace period, the server will find the matching (process-uuid-xl, lk-version) pair in its list which guarantees that the fd's and there corresponding locks are still valid for this client. Configurable options: To set grace-timeout, the following options are option server.grace-timeout value option client.grace-timeout value To enable or disable the lk-heal, option lk-heal [on|off] gluster volume set command can be used to configurable options Change-Id: Id677ef1087b300d649f278b8b2aa0d94eae85ed2 BUG: 795386 Signed-off-by: Mohammed Junaid Reviewed-on: http://review.gluster.com/2766 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/protocol/client/src/client3_1-fops.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'xlators/protocol/client/src/client3_1-fops.c') diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 76d4fb0d6..4d6d57528 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -351,6 +351,7 @@ client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->inode = inode_ref (fd->inode); fdctx->flags = local->flags; fdctx->wbflags = local->wbflags; + fdctx->lk_ctx = fd_lk_ctx_ref (fd->lk_ctx); INIT_LIST_HEAD (&fdctx->sfd_pos); INIT_LIST_HEAD (&fdctx->lock_list); @@ -2279,17 +2280,30 @@ client3_1_releasedir_cbk (struct rpc_req *req, struct iovec *iov, int count, int client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx) { + clnt_conf_t *conf = NULL; call_frame_t *fr = NULL; int32_t ret = -1; + fd_lk_ctx_t *lk_ctx = NULL; if (!fdctx) goto out; + conf = (clnt_conf_t *) this->private; + if (fdctx->remote_fd == -1) { gf_log (this->name, GF_LOG_DEBUG, "not a valid fd"); goto out; } + pthread_mutex_lock (&conf->lock); + { + lk_ctx = fdctx->lk_ctx; + fdctx->lk_ctx = NULL; + } + pthread_mutex_unlock (&conf->lock); + + fd_lk_ctx_unref (lk_ctx); + fr = create_frame (this, this->ctx->pool); if (fdctx->is_dir) { @@ -4466,7 +4480,6 @@ unwind: return 0; } - int32_t client3_1_lk (call_frame_t *frame, xlator_t *this, void *data) @@ -4523,6 +4536,7 @@ client3_1_lk (call_frame_t *frame, xlator_t *this, req.cmd = gf_cmd; req.type = gf_type; gf_proto_flock_from_flock (&req.flock, args->flock); + memcpy (req.gfid, args->fd->inode->gfid, 16); ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LK, -- cgit