diff options
author | Pavan Sondur <pavan@gluster.com> | 2010-09-30 02:25:31 +0000 |
---|---|---|
committer | Vijay Bellur <vijay@dev.gluster.com> | 2010-09-30 11:19:24 -0700 |
commit | af18c636c44b1ea56296850e55afe0e4b2ce845c (patch) | |
tree | 40f8470ec000b96d61b3f8d53286aa0812c9d921 /xlators/protocol/client/src | |
parent | 760daf28898cbb8b5072551735bebee16450ba08 (diff) |
protocol/client: cluster/afr: Support lock recovery and self heal.
Signed-off-by: Pavan Vilas Sondur <pavan@gluster.com>
Signed-off-by: Vijay Bellur <vijay@dev.gluster.com>
BUG: 865 (Add locks recovery support in GlusterFS)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=865
Diffstat (limited to 'xlators/protocol/client/src')
-rw-r--r-- | xlators/protocol/client/src/Makefile.am | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 65 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-helpers.c | 22 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-lk.c | 949 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.c | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.h | 43 | ||||
-rw-r--r-- | xlators/protocol/client/src/client3_1-fops.c | 172 |
8 files changed, 1193 insertions, 63 deletions
diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am index 007810e9901..f2dea68d70d 100644 --- a/xlators/protocol/client/src/Makefile.am +++ b/xlators/protocol/client/src/Makefile.am @@ -9,7 +9,7 @@ client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la client_la_SOURCES = client.c client-helpers.c client3_1-fops.c \ - client-handshake.c client-callback.c + client-handshake.c client-callback.c client-lk.c noinst_HEADERS = client.h client-mem-types.h AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 7511813d3cb..8b0c90ebc8c 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -343,14 +343,38 @@ unwind: } int +client_notify_parents_child_up (xlator_t *this) +{ + xlator_list_t *parent = NULL; + + /* As fuse is not 'parent' of any translator now, triggering its + CHILD_UP event is hacky in case client has only client protocol */ + if (!this->parents && this->ctx && this->ctx->master) { + /* send notify to 'ctx->master' if it exists */ + xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP, + this->graph); + } + + parent = this->parents; + while (parent) { + xlator_notify (parent->xlator, GF_EVENT_CHILD_UP, + this); + parent = parent->next; + } + + return 0; +} + +int client_post_handshake (call_frame_t *frame, xlator_t *this) { clnt_conf_t *conf = NULL; clnt_fd_ctx_t *tmp = NULL; clnt_fd_ctx_t *fdctx = NULL; - xlator_list_t *parent = NULL; struct list_head reopen_head; + int count = 0; + if (!this || !this->private) goto out; @@ -366,34 +390,33 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) list_del_init (&fdctx->sfd_pos); list_add_tail (&fdctx->sfd_pos, &reopen_head); + count++; } } pthread_mutex_unlock (&conf->lock); - list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { - list_del_init (&fdctx->sfd_pos); + /* Delay notifying CHILD_UP to parents + until all locks are recovered */ + if (count > 0) { + gf_log (this->name, GF_LOG_TRACE, + "%d fds open - Delaying child_up until they are re-opened", + count); + client_save_number_fds (conf, count); - if (fdctx->is_dir) - protocol_client_reopendir (this, fdctx); - else - protocol_client_reopen (this, fdctx); - } + list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { + list_del_init (&fdctx->sfd_pos); - /* As fuse is not 'parent' of any translator now, triggering its - CHILD_UP event is hacky in case client has only client protocol */ - if (!this->parents && this->ctx && this->ctx->master) { - /* send notify to 'ctx->master' if it exists */ - xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP, - this->graph); - } + if (fdctx->is_dir) + protocol_client_reopendir (this, fdctx); + else + protocol_client_reopen (this, fdctx); + } + } else { + gf_log (this->name, GF_LOG_TRACE, + "No open fds - notifying all parents child up"); + client_notify_parents_child_up (this); - parent = this->parents; - while (parent) { - xlator_notify (parent->xlator, GF_EVENT_CHILD_UP, - this); - parent = parent->next; } - out: return 0; } diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index d8da60aa1ae..d1282d50c90 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -76,16 +76,26 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, clnt_fd_ctx_t *ctx) ret = fd_ctx_get (file, this, &oldaddr); if (ret >= 0) { - gf_log (this->name, GF_LOG_DEBUG, - "%s (%"PRId64"): trying duplicate remote fd set. ", - loc->path, loc->inode->ino); + if (loc) + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): trying duplicate remote fd set. ", + loc->path, loc->inode->ino); + else + gf_log (this->name, GF_LOG_DEBUG, + "%p: trying duplicate remote fd set. ", + file); } ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "%s (%"PRId64"): failed to set remote fd", - loc->path, loc->inode->ino); + if (loc) + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): failed to set remote fd", + loc->path, loc->inode->ino); + else + gf_log (this->name, GF_LOG_DEBUG, + "%p: failed to set remote fd", + file); } out: return; diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c new file mode 100644 index 00000000000..4ad7fc2b1ec --- /dev/null +++ b/xlators/protocol/client/src/client-lk.c @@ -0,0 +1,949 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "common-utils.h" +#include "xlator.h" +#include "client.h" + +static void +__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock); + +static int +client_send_recovery_lock (call_frame_t *frame, xlator_t *this, + client_posix_lock_t *lock); +static void +__dump_client_lock (client_posix_lock_t *lock) +{ + xlator_t *this = NULL; + + this = THIS; + + gf_log (this->name, GF_LOG_TRACE, + "{fd=%lld}" + "{%s lk-owner:%"PRIu64" %"PRId64" - %"PRId64"}" + "{start=%"PRId64" end=%"PRId64"}", + (unsigned long long)lock->fd, + lock->fl_type == F_WRLCK ? "Write-Lock" : "Read-Lock", + lock->owner, + lock->user_flock.l_start, + lock->user_flock.l_len, + lock->fl_start, + lock->fl_end); +} + +static int +dump_client_locks_fd (clnt_fd_ctx_t *fdctx) +{ + client_posix_lock_t *lock = NULL; + int count = 0; + + pthread_mutex_lock (&fdctx->mutex); + { + list_for_each_entry (lock, &fdctx->lock_list, list) { + __dump_client_lock (lock); + count++; + } + } + pthread_mutex_unlock (&fdctx->mutex); + + return count; + +} + +int +dump_client_locks (inode_t *inode) +{ + fd_t *fd = NULL; + clnt_conf_t *conf = NULL; + xlator_t *this = NULL; + clnt_fd_ctx_t *fdctx = NULL; + + int total_count = 0; + int locks_fd_count = 0; + + this = THIS; + conf = this->private; + + LOCK (&inode->lock); + { + list_for_each_entry (fd, &inode->fd_list, inode_list) { + locks_fd_count = 0; + + pthread_mutex_lock (&conf->lock); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->lock); + + if (fdctx) + locks_fd_count = dump_client_locks_fd (fdctx); + + total_count += locks_fd_count; + } + + } + UNLOCK (&inode->lock); + + return total_count; + +} + +static off_t +__get_lock_length (off_t start, off_t end) +{ + if (end == LLONG_MAX) + return 0; + else + return (end - start + 1); +} + +/* Add two locks */ +static client_posix_lock_t * +add_locks (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ + client_posix_lock_t *sum = NULL; + + sum = GF_CALLOC (1, sizeof (*sum), gf_client_mt_clnt_lock_t); + if (!sum) + return NULL; + + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); + + sum->user_flock.l_start = sum->fl_start; + sum->user_flock.l_len = __get_lock_length (sum->fl_start, + sum->fl_end); + + return sum; +} + +/* Return true if the locks have the same owner */ +static int +same_owner (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ + return ((l1->owner == l2->owner)); +} + +/* Return true if the locks overlap, false otherwise */ +static int +locks_overlap (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); +} + +static void +__delete_client_lock (client_posix_lock_t *lock) +{ + list_del_init (&lock->list); +} + +/* Destroy a posix_lock */ +static void +__destroy_client_lock (client_posix_lock_t *lock) +{ + free (lock); +} + +/* Subtract two locks */ +struct _values { + client_posix_lock_t *locks[3]; +}; + +/* {big} must always be contained inside {small} */ +static struct _values +subtract_locks (client_posix_lock_t *big, client_posix_lock_t *small) +{ + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t ); + GF_ASSERT (v.locks[0]); + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + } + else if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[0]); + v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[1]); + v.locks[2] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[2]); + + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start, + v.locks[0]->fl_end); + + memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); + memcpy (v.locks[2], big, sizeof (client_posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + v.locks[2]->user_flock.l_start = small->fl_end + 1; + } + /* one edge coincides with big */ + else if (small->fl_start == big->fl_start) { + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[0]); + v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + v.locks[0]->user_flock.l_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); + } + else if (small->fl_end == big->fl_end) { + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[0]); + v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start, + v.locks[0]->fl_end); + + memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); + } + else { + gf_log ("client-protocol", GF_LOG_ERROR, + "Unexpected case in subtract_locks. Please send " + "a bug report to gluster-devel@nongnu.org"); + } + + return v; +} + +static void +__delete_unlck_locks (clnt_fd_ctx_t *fdctx) +{ + client_posix_lock_t *l = NULL; + client_posix_lock_t *tmp = NULL; + + list_for_each_entry_safe (l, tmp, &fdctx->lock_list, list) { + if (l->fl_type == F_UNLCK) { + __delete_client_lock (l); + __destroy_client_lock (l); + } + } +} + +static void +__insert_lock (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ + list_add_tail (&lock->list, &fdctx->lock_list); + + return; +} + +static void +__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ + client_posix_lock_t *conf = NULL; + client_posix_lock_t *t = NULL; + client_posix_lock_t *sum = NULL; + int i = 0; + struct _values v = { .locks = {0, 0, 0} }; + + list_for_each_entry_safe (conf, t, &fdctx->lock_list, list) { + if (!locks_overlap (conf, lock)) + continue; + + if (same_owner (conf, lock)) { + if (conf->fl_type == lock->fl_type) { + sum = add_locks (lock, conf); + + sum->fd = lock->fd; + + __delete_client_lock (conf); + __destroy_client_lock (conf); + + __destroy_client_lock (lock); + __insert_and_merge (fdctx, sum); + + return; + } else { + sum = add_locks (lock, conf); + + sum->fd = conf->fd; + sum->owner = conf->owner; + + v = subtract_locks (sum, lock); + + __delete_client_lock (conf); + __destroy_client_lock (conf); + + __delete_client_lock (lock); + __destroy_client_lock (lock); + + __destroy_client_lock (sum); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + INIT_LIST_HEAD (&v.locks[i]->list); + __insert_and_merge (fdctx, + v.locks[i]); + } + + __delete_unlck_locks (fdctx); + return; + } + } + + if (lock->fl_type == F_UNLCK) { + continue; + } + + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + __insert_lock (fdctx, lock); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + __insert_lock (fdctx, lock); + } else { + __destroy_client_lock (lock); + } +} + +static void +client_setlk (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ + pthread_mutex_lock (&fdctx->mutex); + { + __insert_and_merge (fdctx, lock); + } + pthread_mutex_unlock (&fdctx->mutex); + + return; +} + +static void +destroy_client_lock (client_posix_lock_t *lock) +{ + GF_FREE (lock); +} + +int32_t +delete_granted_locks_owner (fd_t *fd, uint64_t owner) +{ + clnt_fd_ctx_t *fdctx = NULL; + client_posix_lock_t *lock = NULL; + client_posix_lock_t *tmp = NULL; + xlator_t *this = NULL; + + struct list_head delete_list; + int ret = 0; + int count = 0; + + INIT_LIST_HEAD (&delete_list); + this = THIS; + fdctx = this_fd_get_ctx (fd, this); + if (!fdctx) { + gf_log (this->name, GF_LOG_DEBUG, + "fdctx not valid"); + ret = -1; + goto out; + } + + pthread_mutex_lock (&fdctx->mutex); + { + list_for_each_entry_safe (lock, tmp, &fdctx->lock_list, list) { + if (lock->owner == owner) { + list_del_init (&lock->list); + list_add_tail (&lock->list, &delete_list); + count++; + } + } + } + pthread_mutex_unlock (&fdctx->mutex); + + list_for_each_entry_safe (lock, tmp, &delete_list, list) { + list_del_init (&lock->list); + destroy_client_lock (lock); + } + +/* FIXME: Need to actually print the locks instead of count */ + gf_log (this->name, GF_LOG_DEBUG, + "Number of locks cleared=%d", count); + +out: + return ret; +} + +int32_t +delete_granted_locks_fd (clnt_fd_ctx_t *fdctx) +{ + client_posix_lock_t *lock = NULL; + client_posix_lock_t *tmp = NULL; + xlator_t *this = NULL; + + struct list_head delete_list; + int ret = 0; + int count = 0; + + INIT_LIST_HEAD (&delete_list); + this = THIS; + + pthread_mutex_lock (&fdctx->mutex); + { + list_splice_init (&fdctx->lock_list, &delete_list); + } + pthread_mutex_unlock (&fdctx->mutex); + + list_for_each_entry_safe (lock, tmp, &delete_list, list) { + list_del_init (&lock->list); + count++; + destroy_client_lock (lock); + } + + /* FIXME: Need to actually print the locks instead of count */ + gf_log (this->name, GF_LOG_DEBUG, + "Number of locks cleared=%d", count); + + return ret; +} + +static void +client_mark_bad_fd (fd_t *fd, clnt_fd_ctx_t *fdctx) +{ + xlator_t *this = NULL; + + this = THIS; + if (fdctx) + fdctx->remote_fd = -1; + this_fd_set_ctx (fd, this, NULL, fdctx); +} + +int32_t +client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd) +{ + int ret = 0; + + if (cmd == F_GETLK || cmd == F_GETLK64) + *gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + *gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + *gf_cmd = GF_LK_SETLKW; + else if (cmd == F_RESLK_LCK) + *gf_cmd = GF_LK_RESLK_LCK; + else if (cmd == F_RESLK_LCKW) + *gf_cmd = GF_LK_RESLK_LCKW; + else if (cmd == F_RESLK_UNLCK) + *gf_cmd = GF_LK_RESLK_UNLCK; + else + ret = -1; + + return ret; + +} + +static client_posix_lock_t * +new_client_lock (struct flock *flock, uint64_t owner, + int32_t cmd, fd_t *fd) +{ + client_posix_lock_t *new_lock = NULL; + xlator_t *this = NULL; + + + this = THIS; + new_lock = GF_CALLOC (1, sizeof (*new_lock), + gf_client_mt_clnt_lock_t); + if (!new_lock) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + goto out; + } + + INIT_LIST_HEAD (&new_lock->list); + new_lock->fd = fd; + memcpy (&new_lock->user_flock, flock, sizeof (struct flock)); + + new_lock->fl_type = flock->l_type; + new_lock->fl_start = flock->l_start; + + if (flock->l_len == 0) + new_lock->fl_end = LLONG_MAX; + else + new_lock->fl_end = flock->l_start + flock->l_len - 1; + + new_lock->owner = owner; + new_lock->cmd = cmd; /* Not really useful */ + + +out: + return new_lock; +} + +void +client_save_number_fds (clnt_conf_t *conf, int count) +{ + LOCK (&conf->rec_lock); + { + conf->reopen_fd_count = count; + } + UNLOCK (&conf->rec_lock); +} + +int +client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner, + int32_t cmd) +{ + clnt_fd_ctx_t *fdctx = NULL; + xlator_t *this = NULL; + client_posix_lock_t *lock = NULL; + clnt_conf_t *conf = NULL; + + int ret = 0; + + this = THIS; + conf = this->private; + + pthread_mutex_lock (&conf->lock); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->lock); + + if (!fdctx) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get fd context. Marking as bad fd."); + ret = -EBADFD; + goto out; + } + + lock = new_client_lock (flock, owner, cmd, fd); + if (!lock) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -ENOMEM; + goto out; + } + + client_setlk (fdctx, lock); + +out: + return ret; + +} + +static int +construct_reserve_unlock (struct flock *lock, call_frame_t *frame, + client_posix_lock_t *client_lock) +{ + GF_ASSERT (lock); + GF_ASSERT (frame); + GF_ASSERT (frame->root->lk_owner); + + lock->l_type = F_UNLCK; + lock->l_start = 0; + lock->l_whence = SEEK_SET; + lock->l_len = 0; /* Whole file */ + lock->l_pid = (uint64_t)frame->root; + + frame->root->lk_owner = client_lock->owner; + + return 0; +} + +static int +construct_reserve_lock (client_posix_lock_t *client_lock, call_frame_t *frame, + struct flock *lock) +{ + GF_ASSERT (client_lock); + + memcpy (lock, &(client_lock->user_flock), sizeof (struct flock)); + + frame->root->lk_owner = client_lock->owner; + + return 0; +} + +uint64_t +decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf) +{ + uint64_t fd_count = 0; + + LOCK (&conf->rec_lock); + { + fd_count = --(conf->reopen_fd_count); + } + UNLOCK (&conf->rec_lock); + + if (fd_count == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "All locks healed on the last fd - notifying CHILDUP"); + client_notify_parents_child_up (this); + } + + return fd_count; +} + +int32_t +client_remove_reserve_lock_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + + uint64_t fd_count = 0; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + /* TODO: critical error describing recovery command + and blanket on ops on fd */ + gf_log (this->name, GF_LOG_CRITICAL, + "Lock recovery failed with error msg=%s", + strerror(op_errno)); + goto cleanup; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Removing reserve lock was successful."); + +cleanup: + frame->local = NULL; + client_mark_bad_fd (local->client_lock->fd, + local->fdctx); + destroy_client_lock (local->client_lock); + client_local_wipe (local); + STACK_DESTROY (frame->root); + fd_count = decrement_reopen_fd_count (this, conf); + gf_log (this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + return 0; +} + +static void +client_remove_reserve_lock (xlator_t *this, call_frame_t *frame, + client_posix_lock_t *lock) +{ + struct flock unlock; + clnt_local_t *local = NULL; + + local = frame->local; + construct_reserve_unlock (&unlock, frame, lock); + + STACK_WIND (frame, client_remove_reserve_lock_cbk, + this, this->fops->lk, + lock->fd, F_RESLK_UNLCK, &unlock); +} + +static client_posix_lock_t * +get_next_recovery_lock (xlator_t *this, clnt_local_t *local) +{ + client_posix_lock_t *lock = NULL; + + pthread_mutex_lock (&local->mutex); + { + if (list_empty (&local->lock_list)) { + gf_log (this->name, GF_LOG_DEBUG, + "lock-list empty"); + goto unlock; + } + + lock = list_entry ((local->lock_list).next, typeof (*lock), list); + list_del_init (&lock->list); + } +unlock: + pthread_mutex_unlock (&local->mutex); + + return lock; + +} + +int32_t +client_reserve_lock_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + + uint64_t fd_count = 0; + + local = frame->local; + conf = this->private; + + /* Got the reserve lock. Check if lock is grantable and proceed + with the real lk call */ + + if (op_ret >= 0) { + /* Lock is grantable if flock reflects a successful getlk() call*/ + if (lock->l_type == F_UNLCK && lock->l_pid) { + gf_log (this->name, GF_LOG_DEBUG, + "Got the reservelk, but the lock is not grantable. "); + client_remove_reserve_lock (this, frame, local->client_lock); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Reserve Lock succeeded"); + client_send_recovery_lock (frame, this, local->client_lock); + goto out; + } + + /* Somebody else has a reserve lk. Lock conflict detected. + Mark fd as bad */ + + gf_log (this->name, GF_LOG_DEBUG, + "Reservelk OP failed. Aborting lock recovery and marking bad fd"); + + client_mark_bad_fd (local->client_lock->fd, + local->fdctx); + destroy_client_lock (local->client_lock); + frame->local = NULL; + client_local_wipe (local); + STACK_DESTROY (frame->root); + fd_count = decrement_reopen_fd_count (this, conf); + gf_log (this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + +out: + return 0; +} + +int32_t +client_recovery_lock_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + clnt_local_t *local = NULL; + clnt_fd_ctx_t *fdctx = NULL; + clnt_conf_t *conf = NULL; + client_posix_lock_t *next_lock = NULL; + + struct flock reserve_flock; + uint64_t fd_count = 0; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + /* TODO: critical error describing recovery command + and blanket on ops on fd */ + gf_log (this->name, GF_LOG_CRITICAL, + "Lock recovery failed with error msg=%s", + strerror(op_errno)); + + client_mark_bad_fd (local->client_lock->fd, + local->fdctx); + goto cleanup; + + /* Lock recovered. Continue with reserve lock for next lock */ + } else { + gf_log (this->name, GF_LOG_DEBUG, + "lock recovered successfully - Continuing with next lock."); + + next_lock = get_next_recovery_lock (this, local); + if (!next_lock) { + gf_log (this->name, GF_LOG_DEBUG, + "All locks recovered on fd"); + goto cleanup; + } + + construct_reserve_lock (next_lock, frame, &reserve_flock); + local->fdctx = fdctx; + local->client_lock = next_lock; + + STACK_WIND (frame, client_reserve_lock_cbk, + this, this->fops->lk, + next_lock->fd, F_RESLK_LCK, &reserve_flock); + goto out; + + } + +cleanup: + frame->local = NULL; + client_local_wipe (local); + if (local->client_lock) + destroy_client_lock (local->client_lock); + STACK_DESTROY (frame->root); + fd_count = decrement_reopen_fd_count (this, conf); + gf_log (this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + +out: + return 0; +} + +static int +client_send_recovery_lock (call_frame_t *frame, xlator_t *this, + client_posix_lock_t *lock) +{ + + frame->root->lk_owner = lock->owner; + + /* Send all locks as F_SETLK to prevent the frame + from blocking if there is a conflict */ + + STACK_WIND (frame, client_recovery_lock_cbk, + this, this->fops->lk, + lock->fd, F_SETLK, + &(lock->user_flock)); + + return 0; +} + +static int +client_lockrec_init (clnt_fd_ctx_t *fdctx, clnt_local_t *local) +{ + + INIT_LIST_HEAD (&local->lock_list); + pthread_mutex_init (&local->mutex, NULL); + + pthread_mutex_lock (&fdctx->mutex); + { + list_splice_init (&fdctx->lock_list, &local->lock_list); + } + pthread_mutex_unlock (&fdctx->mutex); + + return 0; +} + +int +client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ + call_frame_t *frame = NULL; + clnt_local_t *local = NULL; + client_posix_lock_t *lock = NULL; + + struct flock reserve_flock; + int ret = 0; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -ENOMEM; + goto out; + } + + client_lockrec_init (fdctx, local); + + lock = get_next_recovery_lock (this, local); + if (!lock) { + gf_log (this->name, GF_LOG_DEBUG, + "No locks on fd"); + ret = -1; + goto out; + } + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + construct_reserve_lock (lock, frame, &reserve_flock); + + local->fdctx = fdctx; + local->client_lock = lock; + + STACK_WIND (frame, client_reserve_lock_cbk, + this, this->fops->lk, + lock->fd, F_RESLK_LCK, &reserve_flock); + +out: + return ret; + + +} + +int32_t +client_dump_locks (char *name, inode_t *inode, + dict_t *dict) +{ + int ret = 0; + char dict_string[256]; + + ret = dump_client_locks (inode); + snprintf (dict_string, 256, "%d locks dumped in log file", ret); + + dict = dict_new (); + if (!dict) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "trusted.glusterfs.clientlk-dump", dict_string); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Could not set dict with %s", CLIENT_DUMP_LOCKS); + goto out; + } + +out: + return ret; +} + +int32_t +is_client_dump_locks_cmd (char *name) +{ + int ret = 0; + + if (strcmp (name, CLIENT_DUMP_LOCKS) == 0) + ret = 1; + + return ret; +} diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h index 638e537d116..50015b18fd1 100644 --- a/xlators/protocol/client/src/client-mem-types.h +++ b/xlators/protocol/client/src/client-mem-types.h @@ -28,6 +28,7 @@ enum gf_client_mem_types_ { gf_client_mt_clnt_local_t, gf_client_mt_clnt_req_buf_t, gf_client_mt_clnt_fdctx_t, + gf_client_mt_clnt_lock_t, gf_client_mt_end, }; #endif /* __CLIENT_MEM_TYPES_H__ */ diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 726a9dad792..b703b88f4b7 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1865,6 +1865,8 @@ init (xlator_t *this) pthread_mutex_init (&conf->lock, NULL); INIT_LIST_HEAD (&conf->saved_fds); + LOCK_INIT (&conf->rec_lock); + this->private = conf; /* If it returns -1, then its a failure, if it returns +1 we need diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 84940de5f84..a31873a027b 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -33,7 +33,7 @@ /* FIXME: Needs to be defined in a common file */ #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect" #define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect" - +#define CLIENT_DUMP_LOCKS "trusted.glusterfs.clientlk-dump" struct clnt_options { char *remote_subvolume; int ping_timeout; @@ -54,6 +54,10 @@ typedef struct clnt_conf { rpc_clnt_prog_t *mgmt; rpc_clnt_prog_t *handshake; rpc_clnt_prog_t *dump; + + uint64_t reopen_fd_count; /* Count of fds reopened after a + connection is established */ + gf_lock_t rec_lock; } clnt_conf_t; typedef struct _client_fd_ctx { @@ -68,8 +72,24 @@ typedef struct _client_fd_ctx { char released; int32_t flags; int32_t wbflags; + + pthread_mutex_t mutex; + struct list_head lock_list; /* List of all granted locks on this fd */ } clnt_fd_ctx_t; +typedef struct _client_posix_lock { + fd_t *fd; /* The fd on which the lk operation was made */ + + struct flock user_flock; /* the flock supplied by the user */ + off_t fl_start; + off_t fl_end; + short fl_type; + int32_t cmd; /* the cmd for the lock call */ + uint64_t owner; /* lock owner from fuse */ + + struct list_head list; /* reference used to add to the fdctx list of locks */ +} client_posix_lock_t; + typedef struct client_local { loc_t loc; loc_t loc2; @@ -79,6 +99,12 @@ typedef struct client_local { uint32_t wbflags; struct iobref *iobref; fop_cbk_fn_t op; + + client_posix_lock_t *client_lock; + uint64_t owner; + int32_t cmd; + struct list_head lock_list; + pthread_mutex_t mutex; } clnt_local_t; typedef struct client_args { @@ -138,6 +164,17 @@ int unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries int clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp); int clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp); - - +int client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx); +int32_t delete_granted_locks_owner (fd_t *fd, uint64_t owner); +int client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner, + int32_t cmd); +uint64_t decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf); +int32_t delete_granted_locks_fd (clnt_fd_ctx_t *fdctx); +int32_t client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd); +void client_save_number_fds (clnt_conf_t *conf, int count); +int dump_client_locks (inode_t *inode); +int client_notify_parents_child_up (xlator_t *this); +int32_t is_client_dump_locks_cmd (char *name); +int32_t client_dump_locks (char *name, inode_t *inode, + dict_t *dict); #endif /* !_CLIENT_H */ diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 69f2646978d..c3add8fd3ba 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -313,6 +313,7 @@ client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->wbflags = local->wbflags; INIT_LIST_HEAD (&fdctx->sfd_pos); + INIT_LIST_HEAD (&fdctx->lock_list); this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -605,10 +606,14 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { call_frame_t *frame = NULL; + clnt_local_t *local = NULL; + xlator_t *this = NULL; gf_common_rsp rsp = {0,}; int ret = 0; frame = myframe; + this = THIS; + local = frame->local; if (-1 == req->rpc_status) { rsp.op_ret = -1; @@ -623,6 +628,18 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } + if (rsp.op_ret >= 0) { + /* Delete all saved locks of the owner issuing flush */ + gf_log (this->name, GF_LOG_DEBUG, + "Attempting to delete locks of owner=%llu", + (long long unsigned) local->owner); + delete_granted_locks_owner (local->fd, local->owner); + } + + frame->local = NULL; + if (local) + client_local_wipe (local); + out: STACK_UNWIND_STRICT (flush, frame, rsp.op_ret, gf_error_to_errno (rsp.op_errno)); @@ -1442,6 +1459,7 @@ client3_1_create_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->flags = local->flags; INIT_LIST_HEAD (&fdctx->sfd_pos); + INIT_LIST_HEAD (&fdctx->lock_list); this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -1506,12 +1524,14 @@ int client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { - call_frame_t *frame = NULL; - struct flock lock = {0,}; - gfs3_lk_rsp rsp = {0,}; - int ret = 0; + call_frame_t *frame = NULL; + clnt_local_t *local = NULL; + struct flock lock = {0,}; + gfs3_lk_rsp rsp = {0,}; + int ret = 0; frame = myframe; + local = frame->local; if (-1 == req->rpc_status) { rsp.op_ret = -1; @@ -1531,6 +1551,20 @@ client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count, gf_flock_to_flock (&rsp.flock, &lock); } + /* Save the lock to the client lock cache to be able + to recover in the case of server reboot.*/ + if (local->cmd == F_SETLK || local->cmd == F_SETLKW) { + ret = client_add_lock_for_recovery (local->fd, &lock, + local->owner, local->cmd); + if (ret < 0) { + rsp.op_ret = -1; + rsp.op_errno = -ret; + } + } + + frame->local = NULL; + client_local_wipe (local); + out: STACK_UNWIND_STRICT (lk, frame, rsp.op_ret, gf_error_to_errno (rsp.op_errno), &lock); @@ -1777,6 +1811,7 @@ client3_1_opendir_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->is_dir = 1; INIT_LIST_HEAD (&fdctx->sfd_pos); + INIT_LIST_HEAD (&fdctx->lock_list); this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -2014,12 +2049,14 @@ int client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { - int32_t ret = -1; - gfs3_open_rsp rsp = {0,}; - clnt_local_t *local = NULL; - clnt_conf_t *conf = NULL; - clnt_fd_ctx_t *fdctx = NULL; - call_frame_t *frame = NULL; + int32_t ret = -1; + gfs3_open_rsp rsp = {0,}; + int attempt_lock_recovery = _gf_false; + uint64_t fd_count = 0; + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + clnt_fd_ctx_t *fdctx = NULL; + call_frame_t *frame = NULL; frame = myframe; local = frame->local; @@ -2052,6 +2089,7 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, if (!fdctx->released) { list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + attempt_lock_recovery = _gf_true; fdctx = NULL; } } @@ -2060,6 +2098,20 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, } } + if (attempt_lock_recovery) { + ret = client_attempt_lock_recovery (frame->this, local->fdctx); + if (ret < 0) + gf_log (frame->this->name, GF_LOG_DEBUG, + "No locks on fd to recover"); + else { + fd_count = decrement_reopen_fd_count (frame->this, conf); + gf_log (frame->this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + + } + } + out: if (fdctx) client_fdctx_destroy (frame->this, fdctx); @@ -2380,6 +2432,9 @@ client3_1_release (call_frame_t *frame, xlator_t *this, if (remote_fd != -1) { req.fd = remote_fd; req.gfs_id = GFS3_OP_RELEASE; + + delete_granted_locks_fd (fdctx); + ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_RELEASE, client3_1_release_cbk, NULL, @@ -3468,7 +3523,6 @@ unwind: } - int32_t client3_1_flush (call_frame_t *frame, xlator_t *this, void *data) @@ -3477,8 +3531,9 @@ client3_1_flush (call_frame_t *frame, xlator_t *this, gfs3_flush_req req = {0,}; clnt_fd_ctx_t *fdctx = NULL; clnt_conf_t *conf = NULL; + clnt_local_t *local = NULL; int op_errno = ESTALE; - int ret = 0; + int ret = 0; if (!frame || !this || !data) goto unwind; @@ -3507,6 +3562,21 @@ client3_1_flush (call_frame_t *frame, xlator_t *this, goto unwind; } + conf = this->private; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of Memory"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + + } + + local->fd = fd_ref (args->fd); + local->owner = frame->root->lk_owner; + frame->local = local; + req.fd = fdctx->remote_fd; req.gfs_id = GFS3_OP_FLUSH; @@ -4012,16 +4082,23 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this, clnt_conf_t *conf = NULL; clnt_args_t *args = NULL; gfs3_getxattr_req req = {0,}; + dict_t *dict = NULL; int ret = 0; + int32_t op_ret = 0; int op_errno = ESTALE; - if (!frame || !this || !data) + if (!frame || !this || !data) { + op_ret = -1; + op_errno = 0; goto unwind; - + } args = data; - if (!(args->loc && args->loc->inode)) + if (!(args->loc && args->loc->inode)) { + op_ret = -1; + op_errno = EINVAL; goto unwind; + } memcpy (req.gfid, args->loc->inode->gfid, 16); req.namelen = 1; /* Use it as a flag */ @@ -4035,19 +4112,42 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this, conf = this->private; + if (args && args->name) { + if (is_client_dump_locks_cmd ((char *)args->name)) { + ret = client_dump_locks ((char *)args->name, + args->loc->inode, + dict); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Client dump locks failed"); + op_ret = -1; + op_errno = EINVAL; + } + + GF_ASSERT (dict); + op_ret = 0; + op_errno = 0; + goto unwind; + } + } + ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_GETXATTR, client3_1_getxattr_cbk, NULL, xdr_from_getxattr_req, NULL, 0, NULL, 0, NULL); if (ret) { + op_ret = -1; op_errno = ENOTCONN; goto unwind; } return 0; unwind: - STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, NULL); + if (dict) + dict_unref (dict); + return 0; } @@ -4242,20 +4342,28 @@ int32_t client3_1_lk (call_frame_t *frame, xlator_t *this, void *data) { - clnt_args_t *args = NULL; - gfs3_lk_req req = {0,}; - int32_t gf_cmd = 0; - int32_t gf_type = 0; - clnt_fd_ctx_t *fdctx = NULL; - clnt_conf_t *conf = NULL; - int op_errno = ESTALE; - int ret = 0; + clnt_args_t *args = NULL; + gfs3_lk_req req = {0,}; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + clnt_fd_ctx_t *fdctx = NULL; + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + int op_errno = ESTALE; + int ret = 0; if (!frame || !this || !data) goto unwind; args = data; conf = this->private; + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + op_errno = ENOMEM; + goto unwind; + } pthread_mutex_lock (&conf->lock); { @@ -4278,16 +4386,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this, goto unwind; } - if (args->cmd == F_GETLK || args->cmd == F_GETLK64) - gf_cmd = GF_LK_GETLK; - else if (args->cmd == F_SETLK || args->cmd == F_SETLK64) - gf_cmd = GF_LK_SETLK; - else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64) - gf_cmd = GF_LK_SETLKW; - else { + ret = client_cmd_to_gf_cmd (args->cmd, &gf_cmd); + if (ret) { + op_errno = EINVAL; gf_log (this->name, GF_LOG_DEBUG, "Unknown cmd (%d)!", gf_cmd); - goto unwind; } switch (args->flock->l_type) { @@ -4302,6 +4405,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this, break; } + local->owner = frame->root->lk_owner; + local->cmd = args->cmd; + local->fd = fd_ref (args->fd); + frame->local = local; + req.fd = fdctx->remote_fd; req.cmd = gf_cmd; req.type = gf_type; |