diff options
25 files changed, 1484 insertions, 56 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 34543f622..bbe7a2cd7 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -23,7 +23,7 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \ $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c \ $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c \ $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c \ - graph-print.c trie.c run.c options.c + graph-print.c trie.c run.c options.c fd-lk.c nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c @@ -37,7 +37,7 @@ noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h \ rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h \ $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \ $(CONTRIB_BUILDDIR)/uuid/uuid_types.h syncop.h graph-utils.h trie.h run.h \ - options.h lkowner.h + options.h lkowner.h fd-lk.h EXTRA_DIST = graph.l graph.y diff --git a/libglusterfs/src/fd-lk.c b/libglusterfs/src/fd-lk.c new file mode 100644 index 000000000..8df43bb60 --- /dev/null +++ b/libglusterfs/src/fd-lk.c @@ -0,0 +1,458 @@ +/* + Copyright (c) 2011-2012 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "fd-lk.h" +#include "common-utils.h" + + +int32_t +_fd_lk_delete_lock (fd_lk_ctx_node_t *lock) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("fd-lk", lock, out); + + list_del_init (&lock->next); + + ret = 0; +out: + return ret; +} + +int32_t +_fd_lk_destroy_lock (fd_lk_ctx_node_t *lock) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("fd-lk", lock, out); + + GF_FREE (lock); + + ret = 0; +out: + return ret; +} + +int +_fd_lk_destroy_lock_list (fd_lk_ctx_t *lk_ctx) +{ + int ret = -1; + fd_lk_ctx_node_t *lk = NULL; + fd_lk_ctx_node_t *tmp = NULL; + + GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out); + + list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) { + _fd_lk_delete_lock (lk); + _fd_lk_destroy_lock (lk); + } + ret = 0; +out: + return ret; +} + +int +fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx) +{ + int ref = -1; + + GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, err); + + LOCK (&lk_ctx->lock); + { + ref = --lk_ctx->ref; + if (ref < 0) + GF_ASSERT (!ref); + if (ref == 0) + _fd_lk_destroy_lock_list (lk_ctx); + } + UNLOCK (&lk_ctx->lock); + + if (ref == 0) { + LOCK_DESTROY (&lk_ctx->lock); + GF_FREE (lk_ctx); + } + + return 0; +err: + return -1; +} + +fd_lk_ctx_t * +_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx) +{ + if (!lk_ctx) { + gf_log_callingfn ("fd", GF_LOG_WARNING, + "invalid argument"); + return NULL; + } + + ++lk_ctx->ref; + + return lk_ctx; +} + +fd_lk_ctx_t * +fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx) +{ + fd_lk_ctx_t *new_lk_ctx = NULL; + + if (!lk_ctx) { + gf_log_callingfn ("fd", GF_LOG_WARNING, + "invalid argument"); + return NULL; + } + + LOCK (&lk_ctx->lock); + { + new_lk_ctx = _fd_lk_ctx_ref (lk_ctx); + } + UNLOCK (&lk_ctx->lock); + + return new_lk_ctx; +} + +fd_lk_ctx_t * +fd_lk_ctx_create () +{ + fd_lk_ctx_t *fd_lk_ctx = NULL; + + fd_lk_ctx = GF_CALLOC (1, sizeof (fd_lk_ctx_t), + gf_common_mt_fd_lk_ctx_t); + if (!fd_lk_ctx) + goto out; + + INIT_LIST_HEAD (&fd_lk_ctx->lk_list); + + LOCK_INIT (&fd_lk_ctx->lock); + + fd_lk_ctx = fd_lk_ctx_ref (fd_lk_ctx); +out: + return fd_lk_ctx; +} + +int +_fd_lk_insert_lock (fd_lk_ctx_t *lk_ctx, + fd_lk_ctx_node_t *lock) +{ + list_add_tail (&lock->next, &lk_ctx->lk_list); + return 0; +} + +static off_t +_fd_lk_get_lock_len (off_t start, off_t end) +{ + if (end == LLONG_MAX) + return 0; + else + return (end - start + 1); +} + +fd_lk_ctx_node_t * +fd_lk_ctx_node_new (int32_t cmd, struct gf_flock *flock) +{ + fd_lk_ctx_node_t *new_lock = NULL; + + /* TODO: get from mem-pool */ + new_lock = GF_CALLOC (1, sizeof (fd_lk_ctx_node_t), + gf_common_mt_fd_lk_ctx_node_t); + if (!new_lock) + goto out; + + new_lock->cmd = cmd; + + if (flock) { + new_lock->fl_type = flock->l_type; + new_lock->fl_start = flock->l_start; + + if (flock->l_len == 0) + new_lock->fl_end = LLONG_MAX; + else + new_lock->fl_end = flock->l_start + flock->l_len - 1; + + memcpy (&new_lock->user_flock, flock, + sizeof (struct gf_flock)); + } + + INIT_LIST_HEAD (&new_lock->next); +out: + return new_lock; +} + +int32_t +_fd_lk_delete_unlck_locks (fd_lk_ctx_t *lk_ctx) +{ + int32_t ret = -1; + fd_lk_ctx_node_t *tmp = NULL; + fd_lk_ctx_node_t *lk = NULL; + + GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out); + + list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) { + if (lk->fl_type == F_UNLCK) { + _fd_lk_delete_lock (lk); + _fd_lk_destroy_lock (lk); + } + } +out: + return ret; +} + +int +fd_lk_overlap (fd_lk_ctx_node_t *l1, + fd_lk_ctx_node_t *l2) +{ + if (l1->fl_end >= l2->fl_start && + l2->fl_end >= l1->fl_start) + return 1; + + return 0; +} + +fd_lk_ctx_node_t * +_fd_lk_add_locks (fd_lk_ctx_node_t *l1, + fd_lk_ctx_node_t *l2) +{ + fd_lk_ctx_node_t *sum = NULL; + + sum = fd_lk_ctx_node_new (0, NULL); + if (!sum) + goto out; + + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); + + sum->user_flock.l_start = sum->fl_start; + sum->user_flock.l_len = _fd_lk_get_lock_len (sum->fl_start, + sum->fl_end); +out: + return sum; +} + +/* Subtract two locks */ +struct _values { + fd_lk_ctx_node_t *locks[3]; +}; + +int32_t +_fd_lk_sub_locks (struct _values *v, + fd_lk_ctx_node_t *big, + fd_lk_ctx_node_t *small) +{ + int32_t ret = -1; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL); + if (!v->locks[0]) + goto out; + + memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t)); + + v->locks[0]->fl_type = small->fl_type; + v->locks[0]->user_flock.l_type = small->fl_type; + } else if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* small lock is completely inside big lock, + break it down into 3 different locks. */ + v->locks[0] = fd_lk_ctx_node_new (big->cmd, NULL); + if (!v->locks[0]) + goto out; + + v->locks[1] = fd_lk_ctx_node_new (small->cmd, NULL); + if (!v->locks[1]) + goto out; + + v->locks[2] = fd_lk_ctx_node_new (big->cmd, NULL); + if (!v->locks[2]) + goto out; + + memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t)); + v->locks[0]->fl_end = small->fl_start - 1; + v->locks[0]->user_flock.l_len = + _fd_lk_get_lock_len (v->locks[0]->fl_start, + v->locks[0]->fl_end); + + memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t)); + + memcpy (v->locks[2], big, sizeof (fd_lk_ctx_node_t)); + v->locks[2]->fl_start = small->fl_end + 1; + v->locks[2]->user_flock.l_len = + _fd_lk_get_lock_len (v->locks[2]->fl_start, + v->locks[2]->fl_end); + } else if (small->fl_start == big->fl_start) { + /* One of the ends co-incide, break the + locks into two seperate parts */ + v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL); + if (!v->locks[0]) + goto out; + + v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL); + if (!v->locks[1]) + goto out; + + memcpy (v->locks[0], small, sizeof (fd_lk_ctx_node_t)); + + memcpy (v->locks[1], big, sizeof (fd_lk_ctx_node_t)); + v->locks[1]->fl_start = small->fl_end + 1; + v->locks[1]->user_flock.l_start = small->fl_end + 1; + } else if (small->fl_end == big->fl_end) { + /* One of the ends co-incide, break the + locks into two seperate parts */ + v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL); + if (!v->locks[0]) + goto out; + + v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL); + if (!v->locks[1]) + goto out; + + memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t)); + v->locks[0]->fl_end = small->fl_start - 1; + v->locks[0]->user_flock.l_len = + _fd_lk_get_lock_len (v->locks[0]->fl_start, + v->locks[0]->fl_end); + + memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t)); + } else { + /* We should never come to this case */ + GF_ASSERT (!"Invalid case"); + } + ret = 0; +out: + return ret; +} + +static void +_fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx, + fd_lk_ctx_node_t *lock) +{ + int32_t ret = -1; + int32_t i = 0; + fd_lk_ctx_node_t *entry = NULL; + fd_lk_ctx_node_t *t = NULL; + fd_lk_ctx_node_t *sum = NULL; + struct _values v = {.locks = {0, 0, 0 }}; + + list_for_each_entry_safe (entry, t, &lk_ctx->lk_list, next) { + if (!fd_lk_overlap (entry, lock)) + continue; + + if (entry->fl_type == lock->fl_type) { + sum = _fd_lk_add_locks (entry, lock); + if (sum) + return; + sum->fl_type = entry->fl_type; + sum->user_flock.l_type = entry->fl_type; + _fd_lk_delete_lock (entry); + _fd_lk_destroy_lock (entry); + _fd_lk_destroy_lock (lock); + _fd_lk_insert_and_merge (lk_ctx, sum); + return; + } else { + sum = _fd_lk_add_locks (entry, lock); + sum->fl_type = entry->fl_type; + sum->user_flock.l_type = entry->fl_type; + ret = _fd_lk_sub_locks (&v, sum, lock); + if (ret) + return; + _fd_lk_delete_lock (entry); + _fd_lk_destroy_lock (entry); + + _fd_lk_delete_lock (lock); + _fd_lk_destroy_lock (lock); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + INIT_LIST_HEAD (&v.locks[i]->next); + _fd_lk_insert_and_merge (lk_ctx, v.locks[i]); + } + _fd_lk_delete_unlck_locks (lk_ctx); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + _fd_lk_insert_lock (lk_ctx, lock); + } else { + _fd_lk_destroy_lock_list (lk_ctx); + } +} + +static void +print_lock_list (fd_lk_ctx_t *lk_ctx) +{ + fd_lk_ctx_node_t *lk = NULL; + + gf_log ("fd-lk", GF_LOG_WARNING, "lock list:"); + + list_for_each_entry (lk, &lk_ctx->lk_list, next) + gf_log ("fd-lk", GF_LOG_DEBUG, "owner = %s, " + "cmd = %s fl_type = %s, fs_start = %"PRId64", " + "fs_end = %"PRId64", user_flock: l_type = %s, " + "l_start = %"PRId64", l_len = %"PRId64", ", + lkowner_utoa (&lk->user_flock.l_owner), + get_lk_cmd (lk->cmd), get_lk_type (lk->fl_type), + lk->fl_start, lk->fl_end, + get_lk_type (lk->user_flock.l_type), + lk->user_flock.l_start, + lk->user_flock.l_len); +} + +int +fd_lk_insert_and_merge (fd_t *fd, int32_t cmd, + struct gf_flock *flock) +{ + int32_t ret = -1; + fd_lk_ctx_t *lk_ctx = NULL; + fd_lk_ctx_node_t *lk = NULL; + + GF_VALIDATE_OR_GOTO ("fd-lk", fd, out); + GF_VALIDATE_OR_GOTO ("fd-lk", flock, out); + + lk_ctx = fd_lk_ctx_ref (fd->lk_ctx); + lk = fd_lk_ctx_node_new (cmd, flock); + + gf_log ("fd-lk", GF_LOG_DEBUG, + "new lock requrest: owner = %s, fl_type = %s, " + "fs_start = %"PRId64", fs_end = %"PRId64", " + "user_flock: l_type = %s, l_start = %"PRId64", " + "l_len = %"PRId64, lkowner_utoa (&flock->l_owner), + get_lk_type (lk->fl_type), lk->fl_start, + lk->fl_end, get_lk_type (lk->user_flock.l_type), + lk->user_flock.l_start, + lk->user_flock.l_len); + + LOCK (&lk_ctx->lock); + { + _fd_lk_insert_and_merge (lk_ctx, lk); + print_lock_list (lk_ctx); + } + UNLOCK (&lk_ctx->lock); + + fd_lk_ctx_unref (lk_ctx); + + ret = 0; +out: + return ret; +} diff --git a/libglusterfs/src/fd-lk.h b/libglusterfs/src/fd-lk.h new file mode 100644 index 000000000..3e419e143 --- /dev/null +++ b/libglusterfs/src/fd-lk.h @@ -0,0 +1,72 @@ +/* + Copyright (c) 2011-2012 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _FD_LK_H +#define _FD_LK_H + +#include "fd.h" +#include "locking.h" +#include "list.h" +#include "logging.h" +#include "mem-pool.h" +#include "mem-types.h" +#include "glusterfs.h" + +#define get_lk_type(type) \ + type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK") + +#define get_lk_cmd(cmd) \ + cmd == F_SETLKW ? "F_SETLKW" : (cmd == F_SETLK ? "F_SETLK" : "F_GETLK") + +struct _fd; + +struct fd_lk_ctx { + struct list_head lk_list; + int ref; + gf_lock_t lock; +}; +typedef struct fd_lk_ctx fd_lk_ctx_t; + +struct fd_lk_ctx_node { + int32_t cmd; + struct gf_flock user_flock; + off_t fl_start; + off_t fl_end; + short fl_type; + struct list_head next; +}; +typedef struct fd_lk_ctx_node fd_lk_ctx_node_t; + +fd_lk_ctx_t * +_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx); + +fd_lk_ctx_t * +fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx); + +fd_lk_ctx_t * +fd_lk_ctx_create (); + +int +fd_lk_insert_and_merge (struct _fd *lk_ctx, int32_t cmd, + struct gf_flock *flock); + +int +fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx); + +#endif /* _FD_LK_H */ diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c index 62a70c457..d4cc3464a 100644 --- a/libglusterfs/src/fd.c +++ b/libglusterfs/src/fd.c @@ -429,6 +429,7 @@ fd_destroy (fd_t *fd) GF_FREE (fd->_ctx); inode_unref (fd->inode); fd->inode = (inode_t *)0xaaaaaaaa; + fd_lk_ctx_unref (fd->lk_ctx); mem_put (fd); out: return; @@ -505,11 +506,12 @@ __fd_create (inode_t *inode, pid_t pid) fd->_ctx = GF_CALLOC (1, (sizeof (struct _fd_ctx) * fd->xl_count), gf_common_mt_fd_ctx); - if (!fd->_ctx) { - mem_put (fd); - fd = NULL; - goto out; - } + if (!fd->_ctx) + goto free_fd; + + fd->lk_ctx = fd_lk_ctx_create (); + if (!fd->lk_ctx) + goto free_fd_ctx; fd->inode = inode_ref (inode); fd->pid = pid; @@ -518,6 +520,13 @@ __fd_create (inode_t *inode, pid_t pid) LOCK_INIT (&fd->lock); out: return fd; + +free_fd_ctx: + GF_FREE (fd->_ctx); +free_fd: + mem_put (fd); + + return NULL; } diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h index d4cd9bd06..6b0ed891f 100644 --- a/libglusterfs/src/fd.h +++ b/libglusterfs/src/fd.h @@ -30,9 +30,11 @@ #include <unistd.h> #include "glusterfs.h" #include "locking.h" +#include "fd-lk.h" struct _inode; struct _dict; +struct fd_lk_ctx; struct _fd_ctx { union { @@ -59,6 +61,7 @@ struct _fd { 'struct _fd_ctx' array (_ctx).*/ struct _fd_ctx *_ctx; int xl_count; /* Number of xl referred in this fd */ + struct fd_lk_ctx *lk_ctx; }; typedef struct _fd fd_t; diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index b8c61d689..1ebf4d360 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -106,6 +106,8 @@ enum gf_common_mem_types_ { gf_common_mt_trie_end = 81, gf_common_mt_run_argv = 82, gf_common_mt_run_logbuf = 83, - gf_common_mt_end = 84 + gf_common_mt_fd_lk_ctx_t = 84, + gf_common_mt_fd_lk_ctx_node_t = 85, + gf_common_mt_end = 86, }; #endif diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 874f46e0b..827201e2d 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -73,6 +73,7 @@ enum gf_handshake_procnum { GF_HNDSK_SETVOLUME, GF_HNDSK_GETSPEC, GF_HNDSK_PING, + GF_HNDSK_SET_LK_VER, GF_HNDSK_MAXVALUE, }; diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 540c72c82..adf6fd876 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -838,7 +838,6 @@ out: return; } - int rpc_clnt_notify (rpc_transport_t *trans, void *mydata, rpc_transport_event_t event, void *data, ...) diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c index 8008a7470..47d7328db 100644 --- a/rpc/xdr/src/glusterfs3-xdr.c +++ b/rpc/xdr/src/glusterfs3-xdr.c @@ -1821,3 +1821,31 @@ xdr_gfs3_readdirp_rsp (XDR *xdrs, gfs3_readdirp_rsp *objp) return FALSE; return TRUE; } + +bool_t +xdr_gf_set_lk_ver_rsp (XDR *xdrs, gf_set_lk_ver_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_int (xdrs, &objp->lk_ver)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_set_lk_ver_req (XDR *xdrs, gf_set_lk_ver_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_string (xdrs, &objp->uid, ~0)) + return FALSE; + if (!xdr_int (xdrs, &objp->lk_ver)) + return FALSE; + return TRUE; +} diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h index 2b8129a33..49e9d6cc0 100644 --- a/rpc/xdr/src/glusterfs3-xdr.h +++ b/rpc/xdr/src/glusterfs3-xdr.h @@ -1088,6 +1088,19 @@ struct gfs3_readdirp_rsp { }; typedef struct gfs3_readdirp_rsp gfs3_readdirp_rsp; +struct gf_set_lk_ver_rsp { + int op_ret; + int op_errno; + int lk_ver; +}; +typedef struct gf_set_lk_ver_rsp gf_set_lk_ver_rsp; + +struct gf_set_lk_ver_req { + char *uid; + int lk_ver; +}; +typedef struct gf_set_lk_ver_req gf_set_lk_ver_req; + /* the xdr functions */ #if defined(__STDC__) || defined(__cplusplus) @@ -1177,6 +1190,8 @@ extern bool_t xdr_gfs3_dirlist (XDR *, gfs3_dirlist*); extern bool_t xdr_gfs3_readdir_rsp (XDR *, gfs3_readdir_rsp*); extern bool_t xdr_gfs3_dirplist (XDR *, gfs3_dirplist*); extern bool_t xdr_gfs3_readdirp_rsp (XDR *, gfs3_readdirp_rsp*); +extern bool_t xdr_gf_set_lk_ver_rsp (XDR *, gf_set_lk_ver_rsp*); +extern bool_t xdr_gf_set_lk_ver_req (XDR *, gf_set_lk_ver_req*); #else /* K&R C */ extern bool_t xdr_gf_statfs (); @@ -1265,6 +1280,8 @@ extern bool_t xdr_gfs3_dirlist (); extern bool_t xdr_gfs3_readdir_rsp (); extern bool_t xdr_gfs3_dirplist (); extern bool_t xdr_gfs3_readdirp_rsp (); +extern bool_t xdr_gf_set_lk_ver_rsp (); +extern bool_t xdr_gf_set_lk_ver_req (); #endif /* K&R C */ diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x index 710a90378..f35820b57 100644 --- a/rpc/xdr/src/glusterfs3-xdr.x +++ b/rpc/xdr/src/glusterfs3-xdr.x @@ -675,3 +675,13 @@ struct gfs3_readdirp_rsp { opaque xdata<>; /* Extra data */ }; +struct gf_set_lk_ver_rsp { + int op_ret; + int op_errno; + int lk_ver; +}; + +struct gf_set_lk_ver_req { + string uid<>; + int lk_ver; +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 923271f0a..ac5378b1a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -203,6 +203,9 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {VKEY_FEATURES_LIMIT_USAGE, "features/quota", "limit-set", NULL, NO_DOC, 0}, {"features.quota-timeout", "features/quota", "timeout", "0", DOC, 0}, {"server.statedump-path", "protocol/server", "statedump-path", NULL, NO_DOC, 0}, + {"client.lk-heal", "protocol/client", "lk-heal", NULL, DOC, 0}, + {"client.grace-timeout", "protocol/client", "grace-timeout", NULL, DOC, 0}, + {"server.grace-timeout", "protocol/server", "grace-timeout", NULL, DOC, 0}, {NULL, } }; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index e644290e4..e44aca1d0 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -3066,13 +3066,19 @@ static int fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct gf_flock *lock) { + uint32_t op = 0; fuse_state_t *state = NULL; state = frame->root->state; + op = state->finh->opcode; if (op_ret == 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, "%"PRIu64": ERR => 0", frame->root->unique); + fd_lk_insert_and_merge (state->fd, + (op == FUSE_SETLK) ? F_SETLK : F_SETLKW, + &state->lk_lock); + send_fuse_err (this, state->finh, 0); } else { if (op_errno == ENOSYS) { diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 91cda6d0c..1896e6b63 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -22,6 +22,7 @@ #include "config.h" #endif +#include "fd-lk.h" #include "client.h" #include "xlator.h" #include "defaults.h" @@ -39,6 +40,18 @@ extern rpc_clnt_prog_t clnt_pmap_prog; int client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe); +int client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe); + +int client_set_lk_version (xlator_t *this); + +typedef struct client_fd_lk_local { + int ref; + gf_boolean_t error; + gf_lock_t lock; + clnt_fd_ctx_t *fdctx; +}clnt_fd_lk_local_t; + /* Handshake */ void @@ -391,6 +404,411 @@ client_notify_parents_child_up (xlator_t *this) } int +client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + call_frame_t *fr = NULL; + gf_set_lk_ver_rsp rsp = {0,}; + + fr = (call_frame_t *) myframe; + GF_VALIDATE_OR_GOTO ("client", fr, out); + + if (req->rpc_status == -1) { + gf_log (fr->this->name, GF_LOG_WARNING, + "received RPC status error"); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_set_lk_ver_rsp); + if (ret < 0) + gf_log (fr->this->name, GF_LOG_WARNING, + "xdr decoding failed"); + else + gf_log (fr->this->name, GF_LOG_DEBUG, + "Server lk version = %d", rsp.lk_ver); + + ret = 0; +out: + if (fr) + STACK_DESTROY (fr->root); + + return ret; +} + +int +client_set_lk_version (xlator_t *this) +{ + int ret = -1; + clnt_conf_t *conf = NULL; + call_frame_t *frame = NULL; + gf_set_lk_ver_req req = {0, }; + + conf = (clnt_conf_t *) this->private; + + req.lk_ver = client_get_lk_ver (conf); + req.uid = this->ctx->process_uuid; + + gf_log (this->name, GF_LOG_DEBUG, "Sending SET_LK_VERSION"); + + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + + ret = client_submit_request (this, &req, frame, + conf->handshake, + GF_HNDSK_SET_LK_VER, + client_set_lk_version_cbk, + NULL, NULL, 0, NULL, 0, NULL, + (xdrproc_t)xdr_gf_set_lk_ver_req); +out: + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "Failed to send SET_LK_VERSION to server"); + + return ret; +} + +int +client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx) +{ + int ret = 1; + + GF_VALIDATE_OR_GOTO ("client", lk_ctx, out); + + LOCK (&lk_ctx->lock); + { + ret = list_empty (&lk_ctx->lk_list); + } + UNLOCK (&lk_ctx->lock); +out: + return ret; +} + +int +client_fd_lk_count (fd_lk_ctx_t *lk_ctx) +{ + int count = 0; + fd_lk_ctx_node_t *fd_lk = NULL; + + GF_VALIDATE_OR_GOTO ("client", lk_ctx, err); + + LOCK (&lk_ctx->lock); + { + list_for_each_entry (fd_lk, &lk_ctx->lk_list, next) + count++; + } + UNLOCK (&lk_ctx->lock); + + return count; +err: + return -1; +} + +clnt_fd_lk_local_t * +clnt_fd_lk_local_ref (xlator_t *this, clnt_fd_lk_local_t *local) +{ + GF_VALIDATE_OR_GOTO (this->name, local, out); + + LOCK (&local->lock); + { + local->ref++; + } + UNLOCK (&local->lock); +out: + return local; +} + +int +clnt_fd_lk_local_unref (xlator_t *this, clnt_fd_lk_local_t *local) +{ + int ref = -1; + + GF_VALIDATE_OR_GOTO (this->name, local, out); + + LOCK (&local->lock); + { + ref = --local->ref; + } + UNLOCK (&local->lock); + + if (ref == 0) { + LOCK_DESTROY (&local->lock); + GF_FREE (local); + } + ref = 0; +out: + return ref; +} + +clnt_fd_lk_local_t * +clnt_fd_lk_local_create (clnt_fd_ctx_t *fdctx) +{ + clnt_fd_lk_local_t *local = NULL; + + local = GF_CALLOC (1, sizeof (clnt_fd_lk_local_t), + gf_client_mt_clnt_fd_lk_local_t); + if (!local) + goto out; + + local->ref = 1; + local->error = _gf_false; + local->fdctx = fdctx; + + LOCK_INIT (&local->lock); +out: + return local; +} + +void +clnt_mark_fd_bad (clnt_conf_t *conf, clnt_fd_ctx_t *fdctx) +{ + pthread_mutex_lock (&conf->lock); + { + fdctx->remote_fd = -1; + } + pthread_mutex_unlock (&conf->lock); +} + +// call decrement_reopen_fd_count +int +clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + xlator_t *this = NULL; + call_frame_t *frame = NULL; + clnt_conf_t *conf = NULL; + clnt_fd_ctx_t *fdctx = NULL; + + frame = myframe; + this = frame->this; + fdctx = (clnt_fd_ctx_t *) frame->local; + conf = (clnt_conf_t *) this->private; + + clnt_mark_fd_bad (conf, fdctx); + + decrement_reopen_fd_count (this, conf); + + frame->local = NULL; + STACK_DESTROY (frame->root); + + return 0; +} + +int +clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ + int ret = -1; + clnt_conf_t *conf = NULL; + call_frame_t *frame = NULL; + gfs3_release_req req = {{0,},}; + + conf = (clnt_conf_t *) this->private; + + frame = create_frame (THIS, THIS->ctx->pool); + if (!frame) + goto out; + + frame->local = (void *) fdctx; + req.fd = fdctx->remote_fd; + + ret = client_submit_request (this, &req, frame, conf->fops, + GFS3_OP_RELEASE, + clnt_release_reopen_fd_cbk, NULL, + NULL, 0, NULL, 0, NULL, + (xdrproc_t)xdr_gfs3_releasedir_req); +out: + if (ret) { + decrement_reopen_fd_count (this, conf); + clnt_mark_fd_bad (conf, fdctx); + if (frame) { + frame->local = NULL; + STACK_DESTROY (frame->root); + } + } + + return 0; +} + +int +clnt_fd_lk_local_mark_error (xlator_t *this, + clnt_fd_lk_local_t *local) +{ + gf_boolean_t error = _gf_false; + + LOCK (&local->lock); + { + error = local->error; + local->error = _gf_true; + } + UNLOCK (&local->lock); + + if (error) + clnt_release_reopen_fd (this, local->fdctx); + + return 0; +} + +// Also, I think in reopen_cbk, the fdctx is added to +// saved_fd list.. avoid that, may cause a problem +// Reason: While the locks on the fd are reacquired, a release +// fop may be received by the client-protocol translator +// which will free the fdctx datastructure. +int +client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + xlator_t *this = NULL; + gf_common_rsp rsp = {0,}; + call_frame_t *frame = NULL; + clnt_fd_lk_local_t *local = NULL; + + frame = (call_frame_t *) myframe; + this = frame->this; + local = (clnt_fd_lk_local_t *) frame->local; + + if (req->rpc_status == -1) { + gf_log ("client", GF_LOG_WARNING, + "request failed at rpc"); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed"); + goto out; + } + + if (rsp.op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "lock request failed"); + ret = -1; + goto out; + } + + // TODO: Add more info to log. + gf_log (this->name, GF_LOG_DEBUG, "Reacquired lock"); + + ret = 0; +out: + if (ret < 0) + clnt_fd_lk_local_mark_error (this, local); + + (void) clnt_fd_lk_local_unref (this, local); + frame->local = NULL; + STACK_DESTROY (frame->root); + + return ret; +} + +int +_client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ + int32_t ret = -1; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + gfs3_lk_req req = {{0,},}; + struct gf_flock flock = {0,}; + fd_lk_ctx_t *lk_ctx = NULL; + clnt_fd_lk_local_t *local = NULL; + fd_lk_ctx_node_t *fd_lk = NULL; + call_frame_t *frame = NULL; + clnt_conf_t *conf = NULL; + + conf = (clnt_conf_t *) this->private; + lk_ctx = fdctx->lk_ctx; + + local = clnt_fd_lk_local_create (fdctx); + if (!local) { + clnt_release_reopen_fd (this, fdctx); + goto out; + } + + list_for_each_entry (fd_lk, &lk_ctx->lk_list, next) { + memcpy (&flock, &fd_lk->user_flock, + sizeof (struct gf_flock)); + + ret = client_cmd_to_gf_cmd (fd_lk->cmd, &gf_cmd); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "client_cmd_to_gf_cmd failed, " + "aborting reacquiring of locks"); + break; + } + + gf_type = client_type_to_gf_type (flock.l_type); + req.fd = fdctx->remote_fd; + req.cmd = gf_cmd; + req.type = gf_type; + (void) gf_proto_flock_from_flock (&req.flock, + &flock); + + memcpy (req.gfid, fdctx->inode->gfid, 16); + + frame = create_frame (THIS, THIS->ctx->pool); + if (!frame) { + ret = -1; + break; + } + + frame->local = clnt_fd_lk_local_ref (this, local); + frame->root->lk_owner = fd_lk->user_flock.l_owner; + + ret = client_submit_request (this, &req, frame, + conf->fops, GFS3_OP_LK, + client_reacquire_lock_cbk, + NULL, NULL, 0, NULL, 0, NULL, + (xdrproc_t)xdr_gfs3_lk_req); + if (ret) + break; + + ret = 0; + frame = NULL; + } + + if (ret) { + clnt_fd_lk_local_mark_error (this, local); + + if (frame) { + if (frame->local) { + clnt_fd_lk_local_unref (this, frame->local); + frame->local = NULL; + } + STACK_DESTROY (frame->root); + } + } + if (local) + (void) clnt_fd_lk_local_unref (this, local); +out: + return ret; +} + +int +client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ + int32_t ret = -1; + fd_lk_ctx_t *lk_ctx = NULL; + + if (client_fd_lk_list_empty (fdctx->lk_ctx)) { + gf_log (this->name, GF_LOG_WARNING, + "fd lock list is empty"); + decrement_reopen_fd_count (this, (clnt_conf_t *)this->private); + ret = 0; + goto out; + } + + lk_ctx = fdctx->lk_ctx; + + LOCK (&lk_ctx->lock); + { + ret = _client_reacquire_lock (this, fdctx); + } + UNLOCK (&lk_ctx->lock); +out: + return ret; +} + +int client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { @@ -402,11 +820,13 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, clnt_conf_t *conf = NULL; clnt_fd_ctx_t *fdctx = NULL; call_frame_t *frame = NULL; + xlator_t *this = NULL; frame = myframe; if (!frame || !frame->this) goto out; + this = frame->this; local = frame->local; conf = frame->this->private; @@ -454,7 +874,7 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->remote_fd = rsp.fd; if (!fdctx->released) { list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); - if (!list_empty (&fdctx->lock_list)) + if (!client_fd_lk_list_empty (fdctx->lk_ctx)) attempt_lock_recovery = _gf_true; fdctx = NULL; } @@ -463,31 +883,27 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, ret = 0; - attempt_lock_recovery = _gf_false; /* temporarily */ - - if (attempt_lock_recovery) { - ret = client_attempt_lock_recovery (frame->this, local->fdctx); - if (ret < 0) { - gf_log (frame->this->name, GF_LOG_DEBUG, - "lock recovery not attempted on fd"); - } else { - gf_log (frame->this->name, GF_LOG_INFO, - "need to attempt lock recovery on %"PRIu64 - " open fds", fd_count); - } + if (conf->lk_heal && attempt_lock_recovery) { + /* Delay decrement the reopen fd count untill all the + locks corresponding to this fd are acquired.*/ + gf_log (frame->this->name, GF_LOG_WARNING, "acquiring locks on " + "%s", local->loc.path); + ret = client_reacquire_lock (frame->this, local->fdctx); } else { fd_count = decrement_reopen_fd_count (frame->this, conf); } out: if (fdctx) - client_fdctx_destroy (frame->this, fdctx); + client_fdctx_destroy (this, fdctx); if ((ret < 0) && frame && frame->this && conf) decrement_reopen_fd_count (frame->this, conf); - frame->local = NULL; - STACK_DESTROY (frame->root); + if (frame) { + frame->local = NULL; + STACK_DESTROY (frame->root); + } client_local_wipe (local); @@ -792,7 +1208,8 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) } } else { gf_log (this->name, GF_LOG_DEBUG, - "no open fds - notifying all parents child up"); + "no fds to open - notifying all parents child up"); + client_set_lk_version (this); client_notify_parents_child_up (this); } out: @@ -814,6 +1231,7 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m int32_t op_ret = 0; int32_t op_errno = 0; gf_boolean_t auth_fail = _gf_false; + uint32_t lk_ver = 0; frame = myframe; this = frame->this; @@ -895,6 +1313,15 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m goto out; } + ret = dict_get_uint32 (reply, "clnt-lk-version", &lk_ver); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to find key 'clnt-lk-version' in the options"); + goto out; + } + + gf_log (this->name, GF_LOG_INFO, "clnt-lk-version = %d, " + "server-lk-version = %d", client_get_lk_ver (conf), lk_ver); /* TODO: currently setpeer path is broken */ /* if (process_uuid && req->conn && @@ -930,8 +1357,15 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m conf->need_different_port = 0; - /* TODO: more to test */ - client_post_handshake (frame, frame->this); + if (lk_ver != client_get_lk_ver (conf)) { + client_mark_fd_bad (this); + client_post_handshake (frame, frame->this); + } else { + /*TODO: Traverse the saved fd list, and send + release to the server on fd's that were closed + during grace period */ + ; + } out: if (auth_fail) { @@ -1043,6 +1477,14 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) "failed to set 'volfile-checksum'"); } + ret = dict_set_int16 (options, "clnt-lk-version", + client_get_lk_ver (conf)); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set clnt-lk-version(%"PRIu32") in handshake msg", + client_get_lk_ver (conf)); + } + req.dict.dict_len = dict_serialized_length (options); if (req.dict.dict_len < 0) { gf_log (this->name, GF_LOG_ERROR, @@ -1366,6 +1808,7 @@ char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = { [GF_HNDSK_SETVOLUME] = "SETVOLUME", [GF_HNDSK_GETSPEC] = "GETSPEC", [GF_HNDSK_PING] = "PING", + [GF_HNDSK_SET_LK_VER] = "SET_LK_VER" }; rpc_clnt_prog_t clnt_handshake_prog = { diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c index 842e3ec5b..e99fe774d 100644 --- a/xlators/protocol/client/src/client-lk.c +++ b/xlators/protocol/client/src/client-lk.c @@ -608,6 +608,7 @@ decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf) if (fd_count == 0) { gf_log (this->name, GF_LOG_INFO, "last fd open'd/lock-self-heal'd - notifying CHILD-UP"); + client_set_lk_version (this); client_notify_parents_child_up (this); } diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h index c2aa690b1..6bc7daad2 100644 --- a/xlators/protocol/client/src/client-mem-types.h +++ b/xlators/protocol/client/src/client-mem-types.h @@ -29,6 +29,7 @@ enum gf_client_mem_types_ { gf_client_mt_clnt_req_buf_t, gf_client_mt_clnt_fdctx_t, gf_client_mt_clnt_lock_t, + gf_client_mt_clnt_fd_lk_local_t, gf_client_mt_end, }; #endif /* __CLIENT_MEM_TYPES_H__ */ diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 229e01917..8955e237d 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -40,6 +40,81 @@ int client_handshake (xlator_t *this, struct rpc_clnt *rpc); void client_start_ping (void *data); int client_init_rpc (xlator_t *this); int client_destroy_rpc (xlator_t *this); +int client_mark_fd_bad (xlator_t *this); + +int32_t +client_type_to_gf_type (short l_type) +{ + int32_t gf_type; + + switch (l_type) { + case F_RDLCK: + gf_type = GF_LK_F_RDLCK; + break; + case F_WRLCK: + gf_type = GF_LK_F_WRLCK; + break; + case F_UNLCK: + gf_type = GF_LK_F_UNLCK; + break; + } + + return gf_type; +} + +uint32_t +client_get_lk_ver (clnt_conf_t *conf) +{ + uint32_t lk_ver = 0; + + GF_VALIDATE_OR_GOTO ("client", conf, out); + + pthread_mutex_lock (&conf->lock); + { + lk_ver = conf->lk_version; + } + pthread_mutex_unlock (&conf->lock); +out: + return lk_ver; +} + +void +client_grace_timeout (void *data) +{ + int ver = 0; + xlator_t *this = NULL; + struct clnt_conf *conf = NULL; + struct rpc_clnt *rpc = NULL; + + GF_VALIDATE_OR_GOTO ("client", data, out); + + this = THIS; + + rpc = (struct rpc_clnt *) data; + + conf = (struct clnt_conf *) this->private; + + pthread_mutex_lock (&conf->lock); + { + ver = ++conf->lk_version; + /* ver == 0 is a special value used by server + to notify client that this is a fresh connect.*/ + if (ver == 0) + ver = ++conf->lk_version; + + gf_timer_call_cancel (this->ctx, conf->grace_timer); + conf->grace_timer = NULL; + } + pthread_mutex_unlock (&conf->lock); + + gf_log (this->name, GF_LOG_WARNING, + "client grace timer expired, updating " + "the lk-version to %d", ver); + + client_mark_fd_bad (this); +out: + return; +} int client_submit_request (xlator_t *this, void *req, call_frame_t *frame, @@ -828,7 +903,6 @@ out: } - int32_t client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) { @@ -1455,7 +1529,6 @@ out: return 0; } - int32_t client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *lock) @@ -1841,7 +1914,7 @@ out: } - int +int client_mark_fd_bad (xlator_t *this) { clnt_conf_t *conf = NULL; @@ -1908,11 +1981,42 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, conf->last_sent_event = GF_EVENT_CHILD_UP; } } + + /* Cancel grace timer if set */ + pthread_mutex_lock (&conf->lock); + { + if (conf->grace_timer) { + gf_log (this->name, GF_LOG_WARNING, + "Cancelling the grace timer"); + + gf_timer_call_cancel (this->ctx, + conf->grace_timer); + conf->grace_timer = NULL; + } + } + pthread_mutex_unlock (&conf->lock); + break; } case RPC_CLNT_DISCONNECT: + /* client_mark_fd_bad (this); */ - client_mark_fd_bad (this); + pthread_mutex_lock (&conf->lock); + { + if (conf->grace_timer) { + gf_log (this->name, GF_LOG_DEBUG, + "Client grace timer is already set"); + } else { + gf_log (this->name, GF_LOG_WARNING, + "Registering a grace timer"); + conf->grace_timer = + gf_timer_call_after (this->ctx, + conf->grace_tv, + client_grace_timeout, + conf->rpc); + } + } + pthread_mutex_unlock (&conf->lock); if (!conf->skip_notify) { if (conf->connected) @@ -2107,6 +2211,40 @@ out: int +client_init_grace_timer (xlator_t *this, dict_t *options, + clnt_conf_t *conf) +{ + char *lk_heal = NULL; + int32_t ret = -1; + int32_t grace_timeout = -1; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, options, out); + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + conf->lk_heal = _gf_true; + + ret = dict_get_str (options, "lk-heal", &lk_heal); + if (!ret) + gf_string2boolean (lk_heal, &conf->lk_heal); + + ret = dict_get_int32 (options, "grace-timeout", &grace_timeout); + if (!ret) + conf->grace_tv.tv_sec = grace_timeout; + else + conf->grace_tv.tv_sec = 10; + + conf->grace_tv.tv_usec = 0; + + gf_log (this->name, GF_LOG_INFO, "lk-heal = %s", + (conf->lk_heal) ? "on" : "off"); + + ret = 0; +out: + return ret; +} + +int reconfigure (xlator_t *this, dict_t *options) { clnt_conf_t *conf = NULL; @@ -2153,6 +2291,10 @@ reconfigure (xlator_t *this, dict_t *options) } } + ret = client_init_grace_timer (this, options, conf); + if (ret) + goto out; + ret = 0; out: return ret; @@ -2186,6 +2328,14 @@ init (xlator_t *this) pthread_mutex_init (&conf->lock, NULL); INIT_LIST_HEAD (&conf->saved_fds); + /* Initialize parameters for lock self healing*/ + conf->lk_version = 1; + conf->grace_timer = NULL; + + ret = client_init_grace_timer (this, this->options, conf); + if (ret) + goto out; + LOCK_INIT (&conf->rec_lock); conf->last_sent_event = -1; /* To start with we don't have any events */ @@ -2207,7 +2357,6 @@ init (xlator_t *this) goto out; } - ret = client_init_rpc (this); out: if (ret) @@ -2409,5 +2558,11 @@ struct volume_options options[] = { { .key = {"client-bind-insecure"}, .type = GF_OPTION_TYPE_BOOL }, + { .key = {"lk-heal"}, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"grace-timeout"}, + .type = GF_OPTION_TYPE_INT + }, { .key = {NULL} }, }; diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 2dda451c9..00addf34c 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -29,6 +29,7 @@ #include "client-mem-types.h" #include "protocol-common.h" #include "glusterfs3.h" +#include "fd-lk.h" /* FIXME: Needs to be defined in a common file */ #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect" @@ -91,6 +92,12 @@ typedef struct clnt_conf { char need_different_port; /* flag used to change the portmap path in case of 'tcp,rdma' on server */ + gf_boolean_t lk_heal; + uint16_t lk_version; /* this variable is used to distinguish + client-server transaction while + performing lock healing */ + struct timeval grace_tv; + gf_timer_t *grace_timer; } clnt_conf_t; typedef struct _client_fd_ctx { @@ -105,7 +112,7 @@ typedef struct _client_fd_ctx { char released; int32_t flags; int32_t wbflags; - + fd_lk_ctx_t *lk_ctx; pthread_mutex_t mutex; struct list_head lock_list; /* List of all granted locks on this fd */ } clnt_fd_ctx_t; @@ -211,4 +218,11 @@ int32_t client_dump_locks (char *name, inode_t *inode, dict_t *dict); int client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx); +uint32_t client_get_lk_ver (clnt_conf_t *conf); + +int32_t client_type_to_gf_type (short l_type); + +int client_mark_fd_bad (xlator_t *this); + +int client_set_lk_version (xlator_t *this); #endif /* !_CLIENT_H */ diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 76d4fb0d6..4d6d57528 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -351,6 +351,7 @@ client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->inode = inode_ref (fd->inode); fdctx->flags = local->flags; fdctx->wbflags = local->wbflags; + fdctx->lk_ctx = fd_lk_ctx_ref (fd->lk_ctx); INIT_LIST_HEAD (&fdctx->sfd_pos); INIT_LIST_HEAD (&fdctx->lock_list); @@ -2279,17 +2280,30 @@ client3_1_releasedir_cbk (struct rpc_req *req, struct iovec *iov, int count, int client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx) { + clnt_conf_t *conf = NULL; call_frame_t *fr = NULL; int32_t ret = -1; + fd_lk_ctx_t *lk_ctx = NULL; if (!fdctx) goto out; + conf = (clnt_conf_t *) this->private; + if (fdctx->remote_fd == -1) { gf_log (this->name, GF_LOG_DEBUG, "not a valid fd"); goto out; } + pthread_mutex_lock (&conf->lock); + { + lk_ctx = fdctx->lk_ctx; + fdctx->lk_ctx = NULL; + } + pthread_mutex_unlock (&conf->lock); + + fd_lk_ctx_unref (lk_ctx); + fr = create_frame (this, this->ctx->pool); if (fdctx->is_dir) { @@ -4466,7 +4480,6 @@ unwind: return 0; } - int32_t client3_1_lk (call_frame_t *frame, xlator_t *this, void *data) @@ -4523,6 +4536,7 @@ client3_1_lk (call_frame_t *frame, xlator_t *this, req.cmd = gf_cmd; req.type = gf_type; gf_proto_flock_from_flock (&req.flock, args->flock); + memcpy (req.gfid, args->fd->inode->gfid, 16); ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LK, diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index 2c8cf059b..374f5a49a 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -354,6 +354,7 @@ server_setvolume (rpcsvc_request_t *req) int32_t op_errno = EINVAL; int32_t fop_version = 0; int32_t mgmt_version = 0; + uint32_t lk_version = 0; char *buf = NULL; params = dict_new (); @@ -408,8 +409,33 @@ server_setvolume (rpcsvc_request_t *req) goto fail; } + /*lk_verion :: [1..2^31-1]*/ + ret = dict_get_uint32 (params, "clnt-lk-version", &lk_version); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "lock state verison not supplied"); + if (ret < 0) + gf_log (this->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } conn = server_connection_get (this, process_uuid); + if (!conn) { + op_ret = -1; + op_errno = ENOMEM; + goto fail; + } + + server_cancel_conn_timer (this, conn); + if (conn->lk_version != 0 && + conn->lk_version != lk_version) { + (void) server_connection_cleanup (this, conn); + } + if (req->trans->xl_private != conn) req->trans->xl_private = conn; @@ -595,6 +621,12 @@ server_setvolume (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_DEBUG, "failed to set 'process-uuid'"); + ret = dict_set_uint32 (reply, "clnt-lk-version", + conn->lk_version); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set 'clnt-lk-version'"); + ret = dict_set_uint64 (reply, "transport-ptr", ((uint64_t) (long) req->trans)); if (ret) @@ -663,12 +695,50 @@ server_ping (rpcsvc_request_t *req) return 0; } +int +server_set_lk_version (rpcsvc_request_t *req) +{ + int op_ret = -1; + int op_errno = EINVAL; + gf_set_lk_ver_req args = {0, }; + gf_set_lk_ver_rsp rsp = {0,}; + server_connection_t *conn = NULL; + xlator_t *this = NULL; + + this = req->svc->mydata; + //TODO: Decide on an appropriate errno for the error-path + //below + if (!this) + goto fail; + + if (!xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_set_lk_ver_req)) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto fail; + } + + conn = server_connection_get (this, args.uid); + conn->lk_version = args.lk_ver; + server_connection_put (this, conn); + + rsp.lk_ver = args.lk_ver; + + op_ret = 0; +fail: + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + server_submit_reply (NULL, req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_set_lk_ver_rsp); + return 0; +} rpcsvc_actor_t gluster_handshake_actors[] = { - [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, server_null, NULL, NULL, 0}, - [GF_HNDSK_SETVOLUME] = {"SETVOLUME", GF_HNDSK_SETVOLUME, server_setvolume, NULL, NULL, 0}, - [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, NULL, 0}, - [GF_HNDSK_PING] = {"PING", GF_HNDSK_PING, server_ping, NULL, NULL, 0}, + [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, server_null, NULL, NULL, 0}, + [GF_HNDSK_SETVOLUME] = {"SETVOLUME", GF_HNDSK_SETVOLUME, server_setvolume, NULL, NULL, 0}, + [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, NULL, 0}, + [GF_HNDSK_PING] = {"PING", GF_HNDSK_PING, server_ping, NULL, NULL, 0}, + [GF_HNDSK_SET_LK_VER] = {"SET_LK_VER", GF_HNDSK_SET_LK_VER, server_set_lk_version, NULL, NULL }, }; diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index 4980424d3..9de1082dc 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -774,6 +774,7 @@ server_connection_t * server_connection_get (xlator_t *this, const char *id) { server_connection_t *conn = NULL; + server_connection_t *trav = NULL; server_conf_t *conf = NULL; GF_VALIDATE_OR_GOTO ("server", this, out); @@ -783,20 +784,29 @@ server_connection_get (xlator_t *this, const char *id) pthread_mutex_lock (&conf->mutex); { + list_for_each_entry (trav, &conf->conns, list) { + if (!strncmp (trav->id, id, strlen (id))) { + conn = trav; + conn->ref++; + goto unlock; + } + } + conn = (void *) GF_CALLOC (1, sizeof (*conn), gf_server_mt_conn_t); if (!conn) goto unlock; conn->id = gf_strdup (id); + /*'0' denotes uninitialised lock state*/ + conn->lk_version = 0; + conn->ref++; conn->fdtable = gf_fd_fdtable_alloc (); conn->ltable = gf_lock_table_new (); conn->this = this; pthread_mutex_init (&conn->lock, NULL); - list_add (&conn->list, &conf->conns); - conn->ref++; } unlock: pthread_mutex_unlock (&conf->mutex); @@ -982,6 +992,17 @@ out: return ret; } +void +put_server_conn_state (xlator_t *this, rpc_transport_t *xprt) +{ + GF_VALIDATE_OR_GOTO ("server", this, out); + GF_VALIDATE_OR_GOTO ("server", xprt, out); + + xprt->xl_private = NULL; +out: + return; +} + server_connection_t * get_server_conn_state (xlator_t *this, rpc_transport_t *xprt) { @@ -1497,3 +1518,26 @@ gf_server_check_setxattr_cmd (call_frame_t *frame, dict_t *dict) return 0; } + +void +server_cancel_conn_timer (xlator_t *this, server_connection_t *conn) +{ + if (!this || !conn) { + gf_log (THIS->name, GF_LOG_ERROR, "Invalid arguments to " + "cancel connection timer"); + return; + } + + pthread_mutex_lock (&conn->lock); + { + if (!conn->timer) + goto unlock; + + gf_timer_call_cancel (this->ctx, conn->timer); + conn->timer = NULL; + } +unlock: + pthread_mutex_unlock (&conn->lock); + + return; +} diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h index 844c98c27..99ba7e546 100644 --- a/xlators/protocol/server/src/server-helpers.h +++ b/xlators/protocol/server/src/server-helpers.h @@ -68,6 +68,12 @@ server_print_request (call_frame_t *frame); call_frame_t * get_frame_from_request (rpcsvc_request_t *req); +void +server_cancel_conn_timer (xlator_t *this, server_connection_t *conn); + +void +put_server_conn_state (xlator_t *this, rpc_transport_t *xprt); + server_connection_t * get_server_conn_state (xlator_t *this, rpc_transport_t *xptr); diff --git a/xlators/protocol/server/src/server-mem-types.h b/xlators/protocol/server/src/server-mem-types.h index 88bae8cb4..5438ed6db 100644 --- a/xlators/protocol/server/src/server-mem-types.h +++ b/xlators/protocol/server/src/server-mem-types.h @@ -33,6 +33,7 @@ enum gf_server_mem_types_ { gf_server_mt_dirent_rsp_t, gf_server_mt_rsp_buf_t, gf_server_mt_volfile_ctx_t, + gf_server_mt_timer_data_t, gf_server_mt_end, }; #endif /* __SERVER_MEM_TYPES_H__ */ diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index b0697bb7b..b45b77baa 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -36,6 +36,26 @@ #include "authenticate.h" #include "rpcsvc.h" +void +grace_time_handler (void *data) +{ + server_connection_t *conn = NULL; + xlator_t *this = NULL; + + conn = data; + this = conn->this; + + GF_VALIDATE_OR_GOTO (THIS->name, conn, out); + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + + gf_log (this->name, GF_LOG_INFO, "grace timer expired"); + + server_cancel_conn_timer (this, conn); + server_connection_put (this, conn); +out: + return; +} + struct iobuf * gfs_serialize_reply (rpcsvc_request_t *req, void *arg, struct iovec *outmsg, xdrproc_t xdrproc) @@ -554,11 +574,10 @@ int server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) { - xlator_t *this = NULL; - rpc_transport_t *xprt = NULL; - server_connection_t *conn = NULL; - server_conf_t *conf = NULL; - + xlator_t *this = NULL; + rpc_transport_t *xprt = NULL; + server_connection_t *conn = NULL; + server_conf_t *conf = NULL; if (!xl || !data) { gf_log_callingfn ("server", GF_LOG_WARNING, @@ -589,20 +608,37 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, } case RPCSVC_EVENT_DISCONNECT: conn = get_server_conn_state (this, xprt); - if (conn) - server_connection_cleanup (this, conn); - - gf_log (this->name, GF_LOG_INFO, - "disconnected connection from %s", - xprt->peerinfo.identifier); + if (!conn) + break; + put_server_conn_state (this, xprt); + gf_log (this->name, GF_LOG_INFO, "disconnecting connection" + "from %s", xprt->peerinfo.identifier); list_del (&xprt->list); + pthread_mutex_lock (&conn->lock); + { + if (conn->timer) + goto unlock; + + gf_log (this->name, GF_LOG_INFO, "starting a grace " + "timer for %s", xprt->name); + + conn->timer = gf_timer_call_after (this->ctx, + conf->grace_tv, + grace_time_handler, + conn); + } + unlock: + pthread_mutex_unlock (&conn->lock); + break; case RPCSVC_EVENT_TRANSPORT_DESTROY: - conn = get_server_conn_state (this, xprt); - if (conn) - server_connection_put (this, conn); + /*- conn obj has been disassociated from xprt on first + * disconnect. + * conn cleanup and destruction is handed over to + * grace_time_handler or the subsequent handler that 'owns' + * the conn. Nothing left to be done here. */ break; default: break; @@ -668,6 +704,30 @@ _copy_auth_opt (dict_t *unused, int +server_init_grace_timer (xlator_t *this, dict_t *options, + server_conf_t *conf) +{ + int32_t ret = -1; + int32_t grace_timeout = -1; + + GF_VALIDATE_OR_GOTO ("server", this, out); + GF_VALIDATE_OR_GOTO (this->name, options, out); + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + ret = dict_get_int32 (options, "grace-timeout", &grace_timeout); + if (!ret) + conf->grace_tv.tv_sec = grace_timeout; + else + conf->grace_tv.tv_sec = 10; + + conf->grace_tv.tv_usec = 0; + + ret = 0; +out: + return ret; +} + +int reconfigure (xlator_t *this, dict_t *options) { @@ -761,6 +821,7 @@ reconfigure (xlator_t *this, dict_t *options) "Reconfigure not found for transport" ); } } + ret = server_init_grace_timer (this, options, conf); out: gf_log ("", GF_LOG_DEBUG, "returning %d", ret); @@ -797,6 +858,10 @@ init (xlator_t *this) INIT_LIST_HEAD (&conf->xprt_list); pthread_mutex_init (&conf->mutex, NULL); + ret = server_init_grace_timer (this, this->options, conf); + if (ret) + goto out; + ret = server_build_config (this, conf); if (ret) goto out; @@ -1032,5 +1097,8 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_PATH, .default_value = "/tmp" }, + {.key = {"grace-timeout"}, + .type = GF_OPTION_TYPE_INT, + }, { .key = {NULL} }, }; diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 92785c5a9..091a02ccb 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -28,6 +28,7 @@ #include "protocol-common.h" #include "server-mem-types.h" #include "glusterfs3.h" +#include "timer.h" #define DEFAULT_BLOCK_SIZE 4194304 /* 4MB */ #define DEFAULT_VOLUME_FILE_PATH CONFDIR "/glusterfs.vol" @@ -60,8 +61,10 @@ struct _server_connection { pthread_mutex_t lock; fdtable_t *fdtable; struct _lock_table *ltable; + gf_timer_t *timer; xlator_t *bound_xl; xlator_t *this; + uint32_t lk_version; }; typedef struct _server_connection server_connection_t; @@ -92,7 +95,7 @@ struct server_conf { gf_boolean_t trace; char *conf_dir; struct _volfile_ctx *volfile; - + struct timeval grace_tv; dict_t *auth_modules; pthread_mutex_t mutex; struct list_head conns; |