diff options
author | Pavan Sondur <pavan@gluster.com> | 2010-09-30 02:25:31 +0000 |
---|---|---|
committer | Vijay Bellur <vijay@dev.gluster.com> | 2010-09-30 11:19:24 -0700 |
commit | af18c636c44b1ea56296850e55afe0e4b2ce845c (patch) | |
tree | 40f8470ec000b96d61b3f8d53286aa0812c9d921 | |
parent | 760daf28898cbb8b5072551735bebee16450ba08 (diff) |
protocol/client: cluster/afr: Support lock recovery and self heal.
Signed-off-by: Pavan Vilas Sondur <pavan@gluster.com>
Signed-off-by: Vijay Bellur <vijay@dev.gluster.com>
BUG: 865 (Add locks recovery support in GlusterFS)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=865
23 files changed, 2120 insertions, 74 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index f107a8abfa4..9b5a0e99239 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -150,15 +150,28 @@ typedef enum { GF_LK_GETLK = 0, GF_LK_SETLK, GF_LK_SETLKW, + GF_LK_RESLK_LCK, + GF_LK_RESLK_LCKW, + GF_LK_RESLK_UNLCK, } glusterfs_lk_cmds_t; typedef enum { GF_LK_F_RDLCK = 0, GF_LK_F_WRLCK, - GF_LK_F_UNLCK + GF_LK_F_UNLCK, + GF_LK_RECLK, } glusterfs_lk_types_t; +typedef enum { + F_RESLK_LCK = 200, + F_RESLK_LCKW, + F_RESLK_UNLCK, +} glusterfs_lk_recovery_cmds_t; + +typedef enum { + F_GETLK_FD = 250, +} glusterfs_lk_rec_types_t; typedef enum { GF_LOCK_POSIX, diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 64e22c3229e..9d9f5d0414d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1197,6 +1197,16 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd) fd_ctx->up_count = priv->up_count; fd_ctx->down_count = priv->down_count; + fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on), + priv->child_count, + gf_afr_mt_char); + if (!fd_ctx->locked_on) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -ENOMEM; + goto unlock; + } + ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx); INIT_LIST_HEAD (&fd_ctx->entries); @@ -1426,6 +1436,9 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) if (fd_ctx->opened_on) GF_FREE (fd_ctx->opened_on); + if (fd_ctx->locked_on) + GF_FREE (fd_ctx->locked_on); + GF_FREE (fd_ctx); } @@ -2298,8 +2311,9 @@ int32_t afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct flock *lock) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int ret = 0; int child_index = -1; @@ -2339,7 +2353,18 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } else { /* locking has succeeded on all nodes that are up */ - AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, + ret = afr_mark_locked_nodes (this, local->fd, + local->cont.lk.locked_nodes); + if (ret) + gf_log (this->name, GF_LOG_DEBUG, + "Could not save locked nodes info in fdctx"); + + ret = afr_save_locked_fd (this, local->fd); + if (ret) + gf_log (this->name, GF_LOG_DEBUG, + "Could not save locked fd"); + + AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, &local->cont.lk.ret_flock); } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index ee53d1d7bfb..de95a6c763b 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1678,3 +1678,104 @@ afr_unlock (call_frame_t *frame, xlator_t *this) return 0; } + +int +afr_mark_locked_nodes (xlator_t *this, fd_t *fd, + unsigned char *locked_nodes) +{ + afr_private_t *priv = NULL; + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; + + priv = this->private; + + afr_fd_ctx_set (this, fd); + if (ret < 0) + goto out; + + ret = fd_ctx_get (fd, this, &tmp); + fdctx = (afr_fd_ctx_t *) (long) tmp; + + GF_ASSERT (fdctx->locked_on); + + memcpy (fdctx->locked_on, locked_nodes, + priv->child_count); + +out: + return ret; +} + +static int +__is_fd_saved (xlator_t *this, fd_t *fd) +{ + afr_locked_fd_t *locked_fd = NULL; + afr_private_t *priv = NULL; + int found = 0; + + priv = this->private; + + list_for_each_entry (locked_fd, &priv->saved_fds, list) { + if (locked_fd->fd == fd) { + found = 1; + break; + } + } + + return found; +} + +static int +__afr_save_locked_fd (xlator_t *this, fd_t *fd) +{ + afr_private_t *priv = NULL; + afr_locked_fd_t *locked_fd = NULL; + int ret = 0; + + priv = this->private; + + locked_fd = GF_CALLOC (1, sizeof (*locked_fd), + gf_afr_mt_locked_fd); + if (!locked_fd) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + INIT_LIST_HEAD (&locked_fd->list); + + list_add_tail (&locked_fd->list, &priv->saved_fds); + +out: + return ret; +} + +int +afr_save_locked_fd (xlator_t *this, fd_t *fd) +{ + afr_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + + pthread_mutex_lock (&priv->mutex); + { + if (__is_fd_saved (this, fd)) { + gf_log (this->name, GF_LOG_DEBUG, + "fd=%p already saved", fd); + goto unlock; + } + + ret = __afr_save_locked_fd (this, fd); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "fd=%p could not be saved"); + goto unlock; + } + } +unlock: + pthread_mutex_unlock (&priv->mutex); + + return ret; +} diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index c1a86b8275b..31a80e8f57a 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -41,6 +41,7 @@ enum gf_afr_mem_types_ { gf_afr_mt_loc_t, gf_afr_mt_entry_name, gf_afr_mt_pump_priv, + gf_afr_mt_locked_fd, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 09094bdbe58..69b281d973e 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -641,6 +641,9 @@ init (xlator_t *this) priv->first_lookup = 1; priv->root_inode = NULL; + pthread_mutex_init (&priv->mutex, NULL); + INIT_LIST_HEAD (&priv->saved_fds); + ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 26f2c989f4e..68b4a1e305e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -85,6 +85,9 @@ typedef struct _afr_private { struct _pump_private *pump_private; /* Set if we are loaded as pump */ int use_afr_in_pump; + + pthread_mutex_t mutex; + struct list_head saved_fds; /* list of fds on which locks have succeeded */ } afr_private_t; typedef struct { @@ -262,6 +265,11 @@ typedef struct { } afr_internal_lock_t; +typedef struct _afr_locked_fd { + fd_t *fd; + struct list_head list; +} afr_locked_fd_t; + typedef struct _afr_local { unsigned int call_count; unsigned int success_count; @@ -619,6 +627,8 @@ typedef struct { int hit, miss; gf_boolean_t failed_over; struct list_head entries; /* needed for readdir failover */ + + unsigned char *locked_on; /* which subvolumes locks have been successful */ } afr_fd_ctx_t; @@ -655,6 +665,13 @@ int32_t afr_notify (xlator_t *this, int32_t event, void *data, ...); +int +afr_save_locked_fd (xlator_t *this, fd_t *fd); + +int +afr_mark_locked_nodes (xlator_t *this, fd_t *fd, + unsigned char *locked_nodes); + void afr_set_lk_owner (call_frame_t *frame, xlator_t *this); diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index d10b874befb..53dd3aa5da5 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -3,7 +3,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features locks_la_LDFLAGS = -module -avoidversion -locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c +locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = locks.h common.h locks-mem-types.h diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index b34cd97813a..9f2d11304df 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -103,6 +103,12 @@ fd_to_fdnum (fd_t *fd) return ((unsigned long) fd); } +fd_t * +fd_from_fdnum (posix_lock_t *lock) +{ + return ((fd_t *) lock->fd_num); +} + int __pl_inode_is_empty (pl_inode_t *pl_inode) { @@ -441,6 +447,9 @@ pl_inode_get (xlator_t *this, inode_t *inode) INIT_LIST_HEAD (&pl_inode->dom_list); INIT_LIST_HEAD (&pl_inode->ext_list); INIT_LIST_HEAD (&pl_inode->rw_list); + INIT_LIST_HEAD (&pl_inode->reservelk_list); + INIT_LIST_HEAD (&pl_inode->blocked_reservelks); + INIT_LIST_HEAD (&pl_inode->blocked_calls); inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index d707294475f..c7d817f8da0 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -116,4 +116,18 @@ pl_trace_release (xlator_t *this, fd_t *fd); unsigned long fd_to_fdnum (fd_t *fd); +fd_t * +fd_from_fdnum (posix_lock_t *lock); + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block); +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2); + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock, int can_block); +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock); #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h index cf50240863b..f441f95cfa9 100644 --- a/xlators/features/locks/src/locks-mem-types.h +++ b/xlators/features/locks/src/locks-mem-types.h @@ -33,6 +33,7 @@ enum gf_locks_mem_types_ { gf_locks_mt_pl_rw_req_t, gf_locks_mt_posix_locks_private_t, gf_locks_mt_pl_local_t, + gf_locks_mt_pl_fdctx_t, gf_locks_mt_end }; #endif diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 60474615e5f..483e3e6669e 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -125,6 +125,9 @@ struct __pl_inode { struct list_head dom_list; /* list of domains */ struct list_head ext_list; /* list of fcntl locks */ struct list_head rw_list; /* list of waiting r/w requests */ + struct list_head reservelk_list; /* list of reservelks */ + struct list_head blocked_reservelks; /* list of blocked reservelks */ + struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/ int mandatory; /* if mandatory locking is enabled */ inode_t *refkeeper; /* hold refs on an inode while locks are @@ -150,4 +153,8 @@ typedef struct { gf_boolean_t posixlk_count_req; } pl_local_t; +typedef struct { + struct list_head locks_list; +} pl_fdctx_t; + #endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 165280265ad..f085594243c 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -741,6 +741,194 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } +static int +__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd) +{ + int found = 0; + posix_lock_t *l = NULL; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + found = 1; + break; + } + } + + return found; +} + +static posix_lock_t * +lock_dup (posix_lock_t *lock) +{ + posix_lock_t *new_lock = NULL; + + new_lock = new_posix_lock (&lock->user_flock, lock->transport, + lock->client_pid, lock->owner, + (fd_t *)lock->fd_num); + return new_lock; +} + +static int +__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, + pl_fdctx_t *fdctx) +{ + posix_lock_t *l = NULL; + posix_lock_t *duplock = NULL; + int ret = 0; + + fdctx = GF_CALLOC (1, sizeof (*fdctx), + gf_locks_mt_pl_fdctx_t); + if (!fdctx) { + ret = -1; + goto out; + } + + INIT_LIST_HEAD (&fdctx->locks_list); + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + duplock = lock_dup (l); + if (!duplock) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Out of memory"); + ret = -1; + break; + } + + list_add_tail (&duplock->list, &fdctx->locks_list); + } + } + +out: + return ret; +} + +static int +__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, + pl_fdctx_t *fdctx) +{ + int ret = 0; + + ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx); + if (ret) + goto out; + + ret = fd_ctx_set (fd, THIS, (uint64_t) (unsigned long)&fdctx); + if (ret) + gf_log (THIS->name, GF_LOG_DEBUG, + "Failed to set fdctx"); +out: + return ret; + +} + +static void +pl_mark_eol_lock (posix_lock_t *lock) +{ + lock->user_flock.l_type = GF_LK_RECLK; + return; +} + +static posix_lock_t * +__get_next_fdctx_lock (pl_fdctx_t *fdctx) +{ + posix_lock_t *lock = NULL; + + GF_ASSERT (fdctx); + + if (list_empty (&fdctx->locks_list)) { + gf_log (THIS->name, GF_LOG_DEBUG, + "fdctx lock list empty"); + goto out; + } + + lock = list_entry (&fdctx->locks_list, typeof (*lock), + list); + + GF_ASSERT (lock); + + list_del_init (&lock->list); + +out: + return lock; +} + +static int +__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock) +{ + posix_lock_t *lock = NULL; + int ret = 0; + + GF_ASSERT (fdctx); + + lock = __get_next_fdctx_lock (fdctx); + if (!lock) { + gf_log (THIS->name, GF_LOG_DEBUG, + "marking EOL in reqlock"); + pl_mark_eol_lock (reqlock); + goto out; + } + + reqlock->user_flock = lock->user_flock; + +out: + if (lock) + __destroy_lock (lock); + + return ret; +} +static int +pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode, + fd_t *fd, posix_lock_t *reqlock) +{ + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + int ret = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (!__fd_has_locks (pl_inode, fd)) { + gf_log (this->name, GF_LOG_DEBUG, + "fd=%p has no active locks", fd); + ret = 0; + goto unlock; + } + + gf_log (this->name, GF_LOG_DEBUG, + "There are active locks on fd"); + + ret = fd_ctx_get (fd, this, &tmp); + fdctx = (pl_fdctx_t *) tmp; + if (ret) { + gf_log (this->name, GF_LOG_TRACE, + "no fdctx -> copying all locks on fd"); + + ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + goto unlock; + } + + ret = __set_next_lock_fd (fdctx, reqlock); + + } else { + gf_log (this->name, GF_LOG_TRACE, + "fdctx present -> returning the next lock"); + ret = __set_next_lock_fd (fdctx, reqlock); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "could not get next lock of fd"); + goto unlock; + } + } + } + +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + return ret; + +} int pl_lk (call_frame_t *frame, xlator_t *this, @@ -791,6 +979,68 @@ pl_lk (call_frame_t *frame, xlator_t *this, switch (cmd) { + case F_RESLK_LCKW: + can_block = 1; + + /* fall through */ + case F_RESLK_LCK: + memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + reqlock->frame = frame; + reqlock->this = this; + + ret = pl_reserve_setlk (this, pl_inode, reqlock, + can_block); + if (ret < 0) { + if (can_block) + goto out; + + op_ret = -1; + op_errno = -ret; + __destroy_lock (reqlock); + goto unwind; + } + /* Finally a getlk and return the call */ + conf = pl_getlk (pl_inode, reqlock); + if (conf) + posix_lock_to_flock (conf, flock); + break; + + case F_RESLK_UNLCK: + reqlock->frame = frame; + reqlock->this = this; + ret = pl_reserve_unlock (this, pl_inode, reqlock); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + } + __destroy_lock (reqlock); + goto unwind; + + break; + + case F_GETLK_FD: + reqlock->frame = frame; + reqlock->this = this; + ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); + GF_ASSERT (ret >= 0); + + ret = pl_getlk_fd (this, pl_inode, fd, reqlock); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "getting locks on fd failed"); + op_ret = -1; + op_errno = ENOLCK; + goto unwind; + } + + gf_log (this->name, GF_LOG_TRACE, + "Replying with a lock on fd for healing"); + + posix_lock_to_flock (reqlock, flock); + __destroy_lock (reqlock); + + break; + #if F_GETLK != F_GETLK64 case F_GETLK64: #endif @@ -816,6 +1066,12 @@ pl_lk (call_frame_t *frame, xlator_t *this, #endif case F_SETLK: memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); + if (ret < 0) { + gf_log (this->name, GF_LOG_TRACE, + "Lock blocked due to conflicting reserve lock"); + goto out; + } ret = pl_setlk (this, pl_inode, reqlock, can_block); diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c new file mode 100644 index 00000000000..c36484c46cc --- /dev/null +++ b/xlators/features/locks/src/reservelk.c @@ -0,0 +1,450 @@ +/* + Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +void +__delete_reserve_lock (posix_lock_t *lock) +{ + list_del (&lock->list); +} + +void +__destroy_reserve_lock (posix_lock_t *lock) +{ + FREE (lock); +} + +/* Return true if the two reservelks have exactly same lock boundaries */ +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2) +{ + if ((l1->fl_start == l2->fl_start) && + (l1->fl_end == l2->fl_end)) + return 1; + + return 0; +} + +/* Determine if lock is grantable or not */ +static posix_lock_t * +__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + xlator_t *this = NULL; + posix_lock_t *l = NULL; + posix_lock_t *ret_lock = NULL; + + this = THIS; + + if (list_empty (&pl_inode->reservelk_list)) { + gf_log (this->name, GF_LOG_TRACE, + "No reservelks in list"); + goto out; + } + list_for_each_entry (l, &pl_inode->reservelk_list, list){ + if (reservelks_equal (lock, l)) { + ret_lock = l; + break; + } + } +out: + return ret_lock; +} + +static int +__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2) +{ + return ((l1->owner == l2->owner)); + +} + +static posix_lock_t * +__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + + if (list_empty (&pl_inode->reservelk_list)) { + gf_log ("posix-locks", GF_LOG_TRACE, + "reservelk list empty"); + return NULL; + } + + list_for_each_entry (l, &pl_inode->reservelk_list, list) { + if (reservelks_equal (l, lock)) { + gf_log ("posix-locks", GF_LOG_TRACE, + "equal reservelk found"); + break; + } + } + + return l; +} + +static int +__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock) +{ + posix_lock_t *conf = NULL; + int ret = 0; + + conf = __matching_reservelk (pl_inode, lock); + if (conf) { + gf_log (this->name, GF_LOG_TRACE, + "Matching reservelk found"); + if (__same_owner_reservelk (lock, conf)) { + list_del_init (&conf->list); + gf_log (this->name, GF_LOG_TRACE, + "Removing the matching reservelk for setlk to progress"); + FREE (conf); + ret = 0; + } else { + gf_log (this->name, GF_LOG_TRACE, + "Conflicting reservelk found"); + ret = 1; + } + + } + return ret; + +} + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock, int can_block) +{ + int ret = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (__reservelk_conflict (this, pl_inode, lock)) { + gf_log (this->name, GF_LOG_TRACE, + "Found conflicting reservelk. Blocking until reservelk is unlocked."); + lock->blocked = can_block; + list_add_tail (&lock->list, &pl_inode->blocked_calls); + ret = -1; + goto unlock; + } + + gf_log (this->name, GF_LOG_TRACE, + "no conflicting reservelk found. Call continuing"); + ret = 0; + + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + return ret; + +} + + +/* Determines if lock can be granted and adds the lock. If the lock + * is blocking, adds it to the blocked_reservelks. + */ +static int +__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + posix_lock_t *conf = NULL; + int ret = -EINVAL; + + conf = __reservelk_grantable (pl_inode, lock); + if (conf){ + ret = -EAGAIN; + if (can_block == 0) + goto out; + + list_add_tail (&lock->list, &pl_inode->blocked_reservelks); + + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->owner, + lock->user_flock.l_start, + lock->user_flock.l_len); + + + goto out; + } + + list_add (&lock->list, &pl_inode->reservelk_list); + + ret = 0; + +out: + return ret; +} + +static posix_lock_t * +find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode) +{ + posix_lock_t *l = NULL; + list_for_each_entry (l, &pl_inode->reservelk_list, list) { + if (reservelks_equal (l, lock)) + return l; + } + return NULL; +} + +/* Set F_UNLCK removes a lock which has the exact same lock boundaries + * as the UNLCK lock specifies. If such a lock is not found, returns invalid + */ +static posix_lock_t * +__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode) +{ + + posix_lock_t *conf = NULL; + + conf = find_matching_reservelk (lock, pl_inode); + if (!conf) { + gf_log (this->name, GF_LOG_DEBUG, + " Matching lock not found for unlock"); + goto out; + } + __delete_reserve_lock (conf); + gf_log (this->name, GF_LOG_DEBUG, + " Matching lock found for unlock"); + +out: + return conf; + + +} + +static void +__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + + list_del_init (&bl->list); + + bl_ret = __lock_reservelk (this, pl_inode, bl, 1); + + if (bl_ret == 0) { + list_add (&bl->list, granted); + } + } + return; +} + +/* Grant all reservelks blocked on lock(s) */ +void +grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; + + INIT_LIST_HEAD (&granted); + + if (list_empty (&pl_inode->blocked_reservelks)) { + gf_log (this->name, GF_LOG_TRACE, + "No blocked locks to be granted"); + return; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_reserve_locks (this, pl_inode, &granted); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, list) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->owner, + lock->user_flock.l_start, + lock->user_flock.l_len); + + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock); + } + +} + +static void +__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + + list_del_init (&bl->list); + + bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked); + + if (bl_ret == 0) { + list_add_tail (&bl->list, granted); + } + } + return; +} + +void +grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; + fd_t *fd = NULL; + + int can_block = 0; + int32_t cmd = 0; + int ret = 0; + + if (list_empty (&pl_inode->blocked_calls)) { + gf_log (this->name, GF_LOG_TRACE, + "No blocked lock calls to be granted"); + return; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_lock_calls (this, pl_inode, &granted); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, list) { + fd = fd_from_fdnum (lock); + + if (lock->blocked) { + can_block = 1; + cmd = F_SETLKW; + } + else + cmd = F_SETLK; + + lock->blocked = 0; + ret = pl_setlk (this, pl_inode, lock, can_block); + if (ret == -1) { + if (can_block) { + pl_trace_block (this, lock->frame, fd, NULL, + cmd, &lock->user_flock, NULL); + continue; + } else { + gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); + pl_trace_out (this, lock->frame, fd, NULL, cmd, + &lock->user_flock, -1, EAGAIN, NULL); + pl_update_refkeeper (this, fd->inode); + STACK_UNWIND_STRICT (lk, lock->frame, -1, EAGAIN, &lock->user_flock); + __destroy_lock (lock); + } + } + + } + +} + + +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *retlock = NULL; + int ret = -1; + + pthread_mutex_lock (&pl_inode->mutex); + { + retlock = __reserve_unlock_lock (this, lock, pl_inode); + if (!retlock) { + gf_log (this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + + gf_log (this->name, GF_LOG_TRACE, + "Reservelk Unlock successful"); + __destroy_reserve_lock (retlock); + ret = 0; + } +out: + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_reserve_locks (this, pl_inode); + grant_blocked_lock_calls (this, pl_inode); + + return ret; + +} + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + int ret = -EINVAL; + + pthread_mutex_lock (&pl_inode->mutex); + { + + ret = __lock_reservelk (this, pl_inode, lock, can_block); + if (ret < 0) + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->owner, + lock->user_flock.l_start, + lock->user_flock.l_len); + else + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->owner, + lock->fl_start, + lock->fl_end); + + } + pthread_mutex_unlock (&pl_inode->mutex); + return ret; +} diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am index 007810e9901..f2dea68d70d 100644 --- a/xlators/protocol/client/src/Makefile.am +++ b/xlators/protocol/client/src/Makefile.am @@ -9,7 +9,7 @@ client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la client_la_SOURCES = client.c client-helpers.c client3_1-fops.c \ - client-handshake.c client-callback.c + client-handshake.c client-callback.c client-lk.c noinst_HEADERS = client.h client-mem-types.h AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 7511813d3cb..8b0c90ebc8c 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -343,14 +343,38 @@ unwind: } int +client_notify_parents_child_up (xlator_t *this) +{ + xlator_list_t *parent = NULL; + + /* As fuse is not 'parent' of any translator now, triggering its + CHILD_UP event is hacky in case client has only client protocol */ + if (!this->parents && this->ctx && this->ctx->master) { + /* send notify to 'ctx->master' if it exists */ + xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP, + this->graph); + } + + parent = this->parents; + while (parent) { + xlator_notify (parent->xlator, GF_EVENT_CHILD_UP, + this); + parent = parent->next; + } + + return 0; +} + +int client_post_handshake (call_frame_t *frame, xlator_t *this) { clnt_conf_t *conf = NULL; clnt_fd_ctx_t *tmp = NULL; clnt_fd_ctx_t *fdctx = NULL; - xlator_list_t *parent = NULL; struct list_head reopen_head; + int count = 0; + if (!this || !this->private) goto out; @@ -366,34 +390,33 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) list_del_init (&fdctx->sfd_pos); list_add_tail (&fdctx->sfd_pos, &reopen_head); + count++; } } pthread_mutex_unlock (&conf->lock); - list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { - list_del_init (&fdctx->sfd_pos); + /* Delay notifying CHILD_UP to parents + until all locks are recovered */ + if (count > 0) { + gf_log (this->name, GF_LOG_TRACE, + "%d fds open - Delaying child_up until they are re-opened", + count); + client_save_number_fds (conf, count); - if (fdctx->is_dir) - protocol_client_reopendir (this, fdctx); - else - protocol_client_reopen (this, fdctx); - } + list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { + list_del_init (&fdctx->sfd_pos); - /* As fuse is not 'parent' of any translator now, triggering its - CHILD_UP event is hacky in case client has only client protocol */ - if (!this->parents && this->ctx && this->ctx->master) { - /* send notify to 'ctx->master' if it exists */ - xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP, - this->graph); - } + if (fdctx->is_dir) + protocol_client_reopendir (this, fdctx); + else + protocol_client_reopen (this, fdctx); + } + } else { + gf_log (this->name, GF_LOG_TRACE, + "No open fds - notifying all parents child up"); + client_notify_parents_child_up (this); - parent = this->parents; - while (parent) { - xlator_notify (parent->xlator, GF_EVENT_CHILD_UP, - this); - parent = parent->next; } - out: return 0; } diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index d8da60aa1ae..d1282d50c90 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -76,16 +76,26 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, clnt_fd_ctx_t *ctx) ret = fd_ctx_get (file, this, &oldaddr); if (ret >= 0) { - gf_log (this->name, GF_LOG_DEBUG, - "%s (%"PRId64"): trying duplicate remote fd set. ", - loc->path, loc->inode->ino); + if (loc) + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): trying duplicate remote fd set. ", + loc->path, loc->inode->ino); + else + gf_log (this->name, GF_LOG_DEBUG, + "%p: trying duplicate remote fd set. ", + file); } ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "%s (%"PRId64"): failed to set remote fd", - loc->path, loc->inode->ino); + if (loc) + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): failed to set remote fd", + loc->path, loc->inode->ino); + else + gf_log (this->name, GF_LOG_DEBUG, + "%p: failed to set remote fd", + file); } out: return; diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c new file mode 100644 index 00000000000..4ad7fc2b1ec --- /dev/null +++ b/xlators/protocol/client/src/client-lk.c @@ -0,0 +1,949 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "common-utils.h" +#include "xlator.h" +#include "client.h" + +static void +__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock); + +static int +client_send_recovery_lock (call_frame_t *frame, xlator_t *this, + client_posix_lock_t *lock); +static void +__dump_client_lock (client_posix_lock_t *lock) +{ + xlator_t *this = NULL; + + this = THIS; + + gf_log (this->name, GF_LOG_TRACE, + "{fd=%lld}" + "{%s lk-owner:%"PRIu64" %"PRId64" - %"PRId64"}" + "{start=%"PRId64" end=%"PRId64"}", + (unsigned long long)lock->fd, + lock->fl_type == F_WRLCK ? "Write-Lock" : "Read-Lock", + lock->owner, + lock->user_flock.l_start, + lock->user_flock.l_len, + lock->fl_start, + lock->fl_end); +} + +static int +dump_client_locks_fd (clnt_fd_ctx_t *fdctx) +{ + client_posix_lock_t *lock = NULL; + int count = 0; + + pthread_mutex_lock (&fdctx->mutex); + { + list_for_each_entry (lock, &fdctx->lock_list, list) { + __dump_client_lock (lock); + count++; + } + } + pthread_mutex_unlock (&fdctx->mutex); + + return count; + +} + +int +dump_client_locks (inode_t *inode) +{ + fd_t *fd = NULL; + clnt_conf_t *conf = NULL; + xlator_t *this = NULL; + clnt_fd_ctx_t *fdctx = NULL; + + int total_count = 0; + int locks_fd_count = 0; + + this = THIS; + conf = this->private; + + LOCK (&inode->lock); + { + list_for_each_entry (fd, &inode->fd_list, inode_list) { + locks_fd_count = 0; + + pthread_mutex_lock (&conf->lock); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->lock); + + if (fdctx) + locks_fd_count = dump_client_locks_fd (fdctx); + + total_count += locks_fd_count; + } + + } + UNLOCK (&inode->lock); + + return total_count; + +} + +static off_t +__get_lock_length (off_t start, off_t end) +{ + if (end == LLONG_MAX) + return 0; + else + return (end - start + 1); +} + +/* Add two locks */ +static client_posix_lock_t * +add_locks (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ + client_posix_lock_t *sum = NULL; + + sum = GF_CALLOC (1, sizeof (*sum), gf_client_mt_clnt_lock_t); + if (!sum) + return NULL; + + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); + + sum->user_flock.l_start = sum->fl_start; + sum->user_flock.l_len = __get_lock_length (sum->fl_start, + sum->fl_end); + + return sum; +} + +/* Return true if the locks have the same owner */ +static int +same_owner (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ + return ((l1->owner == l2->owner)); +} + +/* Return true if the locks overlap, false otherwise */ +static int +locks_overlap (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); +} + +static void +__delete_client_lock (client_posix_lock_t *lock) +{ + list_del_init (&lock->list); +} + +/* Destroy a posix_lock */ +static void +__destroy_client_lock (client_posix_lock_t *lock) +{ + free (lock); +} + +/* Subtract two locks */ +struct _values { + client_posix_lock_t *locks[3]; +}; + +/* {big} must always be contained inside {small} */ +static struct _values +subtract_locks (client_posix_lock_t *big, client_posix_lock_t *small) +{ + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t ); + GF_ASSERT (v.locks[0]); + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + } + else if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[0]); + v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[1]); + v.locks[2] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[2]); + + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start, + v.locks[0]->fl_end); + + memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); + memcpy (v.locks[2], big, sizeof (client_posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + v.locks[2]->user_flock.l_start = small->fl_end + 1; + } + /* one edge coincides with big */ + else if (small->fl_start == big->fl_start) { + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[0]); + v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + v.locks[0]->user_flock.l_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); + } + else if (small->fl_end == big->fl_end) { + v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[0]); + v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), + gf_client_mt_clnt_lock_t); + GF_ASSERT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start, + v.locks[0]->fl_end); + + memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); + } + else { + gf_log ("client-protocol", GF_LOG_ERROR, + "Unexpected case in subtract_locks. Please send " + "a bug report to gluster-devel@nongnu.org"); + } + + return v; +} + +static void +__delete_unlck_locks (clnt_fd_ctx_t *fdctx) +{ + client_posix_lock_t *l = NULL; + client_posix_lock_t *tmp = NULL; + + list_for_each_entry_safe (l, tmp, &fdctx->lock_list, list) { + if (l->fl_type == F_UNLCK) { + __delete_client_lock (l); + __destroy_client_lock (l); + } + } +} + +static void +__insert_lock (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ + list_add_tail (&lock->list, &fdctx->lock_list); + + return; +} + +static void +__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ + client_posix_lock_t *conf = NULL; + client_posix_lock_t *t = NULL; + client_posix_lock_t *sum = NULL; + int i = 0; + struct _values v = { .locks = {0, 0, 0} }; + + list_for_each_entry_safe (conf, t, &fdctx->lock_list, list) { + if (!locks_overlap (conf, lock)) + continue; + + if (same_owner (conf, lock)) { + if (conf->fl_type == lock->fl_type) { + sum = add_locks (lock, conf); + + sum->fd = lock->fd; + + __delete_client_lock (conf); + __destroy_client_lock (conf); + + __destroy_client_lock (lock); + __insert_and_merge (fdctx, sum); + + return; + } else { + sum = add_locks (lock, conf); + + sum->fd = conf->fd; + sum->owner = conf->owner; + + v = subtract_locks (sum, lock); + + __delete_client_lock (conf); + __destroy_client_lock (conf); + + __delete_client_lock (lock); + __destroy_client_lock (lock); + + __destroy_client_lock (sum); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + INIT_LIST_HEAD (&v.locks[i]->list); + __insert_and_merge (fdctx, + v.locks[i]); + } + + __delete_unlck_locks (fdctx); + return; + } + } + + if (lock->fl_type == F_UNLCK) { + continue; + } + + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + __insert_lock (fdctx, lock); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + __insert_lock (fdctx, lock); + } else { + __destroy_client_lock (lock); + } +} + +static void +client_setlk (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ + pthread_mutex_lock (&fdctx->mutex); + { + __insert_and_merge (fdctx, lock); + } + pthread_mutex_unlock (&fdctx->mutex); + + return; +} + +static void +destroy_client_lock (client_posix_lock_t *lock) +{ + GF_FREE (lock); +} + +int32_t +delete_granted_locks_owner (fd_t *fd, uint64_t owner) +{ + clnt_fd_ctx_t *fdctx = NULL; + client_posix_lock_t *lock = NULL; + client_posix_lock_t *tmp = NULL; + xlator_t *this = NULL; + + struct list_head delete_list; + int ret = 0; + int count = 0; + + INIT_LIST_HEAD (&delete_list); + this = THIS; + fdctx = this_fd_get_ctx (fd, this); + if (!fdctx) { + gf_log (this->name, GF_LOG_DEBUG, + "fdctx not valid"); + ret = -1; + goto out; + } + + pthread_mutex_lock (&fdctx->mutex); + { + list_for_each_entry_safe (lock, tmp, &fdctx->lock_list, list) { + if (lock->owner == owner) { + list_del_init (&lock->list); + list_add_tail (&lock->list, &delete_list); + count++; + } + } + } + pthread_mutex_unlock (&fdctx->mutex); + + list_for_each_entry_safe (lock, tmp, &delete_list, list) { + list_del_init (&lock->list); + destroy_client_lock (lock); + } + +/* FIXME: Need to actually print the locks instead of count */ + gf_log (this->name, GF_LOG_DEBUG, + "Number of locks cleared=%d", count); + +out: + return ret; +} + +int32_t +delete_granted_locks_fd (clnt_fd_ctx_t *fdctx) +{ + client_posix_lock_t *lock = NULL; + client_posix_lock_t *tmp = NULL; + xlator_t *this = NULL; + + struct list_head delete_list; + int ret = 0; + int count = 0; + + INIT_LIST_HEAD (&delete_list); + this = THIS; + + pthread_mutex_lock (&fdctx->mutex); + { + list_splice_init (&fdctx->lock_list, &delete_list); + } + pthread_mutex_unlock (&fdctx->mutex); + + list_for_each_entry_safe (lock, tmp, &delete_list, list) { + list_del_init (&lock->list); + count++; + destroy_client_lock (lock); + } + + /* FIXME: Need to actually print the locks instead of count */ + gf_log (this->name, GF_LOG_DEBUG, + "Number of locks cleared=%d", count); + + return ret; +} + +static void +client_mark_bad_fd (fd_t *fd, clnt_fd_ctx_t *fdctx) +{ + xlator_t *this = NULL; + + this = THIS; + if (fdctx) + fdctx->remote_fd = -1; + this_fd_set_ctx (fd, this, NULL, fdctx); +} + +int32_t +client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd) +{ + int ret = 0; + + if (cmd == F_GETLK || cmd == F_GETLK64) + *gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + *gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + *gf_cmd = GF_LK_SETLKW; + else if (cmd == F_RESLK_LCK) + *gf_cmd = GF_LK_RESLK_LCK; + else if (cmd == F_RESLK_LCKW) + *gf_cmd = GF_LK_RESLK_LCKW; + else if (cmd == F_RESLK_UNLCK) + *gf_cmd = GF_LK_RESLK_UNLCK; + else + ret = -1; + + return ret; + +} + +static client_posix_lock_t * +new_client_lock (struct flock *flock, uint64_t owner, + int32_t cmd, fd_t *fd) +{ + client_posix_lock_t *new_lock = NULL; + xlator_t *this = NULL; + + + this = THIS; + new_lock = GF_CALLOC (1, sizeof (*new_lock), + gf_client_mt_clnt_lock_t); + if (!new_lock) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + goto out; + } + + INIT_LIST_HEAD (&new_lock->list); + new_lock->fd = fd; + memcpy (&new_lock->user_flock, flock, sizeof (struct flock)); + + new_lock->fl_type = flock->l_type; + new_lock->fl_start = flock->l_start; + + if (flock->l_len == 0) + new_lock->fl_end = LLONG_MAX; + else + new_lock->fl_end = flock->l_start + flock->l_len - 1; + + new_lock->owner = owner; + new_lock->cmd = cmd; /* Not really useful */ + + +out: + return new_lock; +} + +void +client_save_number_fds (clnt_conf_t *conf, int count) +{ + LOCK (&conf->rec_lock); + { + conf->reopen_fd_count = count; + } + UNLOCK (&conf->rec_lock); +} + +int +client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner, + int32_t cmd) +{ + clnt_fd_ctx_t *fdctx = NULL; + xlator_t *this = NULL; + client_posix_lock_t *lock = NULL; + clnt_conf_t *conf = NULL; + + int ret = 0; + + this = THIS; + conf = this->private; + + pthread_mutex_lock (&conf->lock); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->lock); + + if (!fdctx) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get fd context. Marking as bad fd."); + ret = -EBADFD; + goto out; + } + + lock = new_client_lock (flock, owner, cmd, fd); + if (!lock) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -ENOMEM; + goto out; + } + + client_setlk (fdctx, lock); + +out: + return ret; + +} + +static int +construct_reserve_unlock (struct flock *lock, call_frame_t *frame, + client_posix_lock_t *client_lock) +{ + GF_ASSERT (lock); + GF_ASSERT (frame); + GF_ASSERT (frame->root->lk_owner); + + lock->l_type = F_UNLCK; + lock->l_start = 0; + lock->l_whence = SEEK_SET; + lock->l_len = 0; /* Whole file */ + lock->l_pid = (uint64_t)frame->root; + + frame->root->lk_owner = client_lock->owner; + + return 0; +} + +static int +construct_reserve_lock (client_posix_lock_t *client_lock, call_frame_t *frame, + struct flock *lock) +{ + GF_ASSERT (client_lock); + + memcpy (lock, &(client_lock->user_flock), sizeof (struct flock)); + + frame->root->lk_owner = client_lock->owner; + + return 0; +} + +uint64_t +decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf) +{ + uint64_t fd_count = 0; + + LOCK (&conf->rec_lock); + { + fd_count = --(conf->reopen_fd_count); + } + UNLOCK (&conf->rec_lock); + + if (fd_count == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "All locks healed on the last fd - notifying CHILDUP"); + client_notify_parents_child_up (this); + } + + return fd_count; +} + +int32_t +client_remove_reserve_lock_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + + uint64_t fd_count = 0; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + /* TODO: critical error describing recovery command + and blanket on ops on fd */ + gf_log (this->name, GF_LOG_CRITICAL, + "Lock recovery failed with error msg=%s", + strerror(op_errno)); + goto cleanup; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Removing reserve lock was successful."); + +cleanup: + frame->local = NULL; + client_mark_bad_fd (local->client_lock->fd, + local->fdctx); + destroy_client_lock (local->client_lock); + client_local_wipe (local); + STACK_DESTROY (frame->root); + fd_count = decrement_reopen_fd_count (this, conf); + gf_log (this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + return 0; +} + +static void +client_remove_reserve_lock (xlator_t *this, call_frame_t *frame, + client_posix_lock_t *lock) +{ + struct flock unlock; + clnt_local_t *local = NULL; + + local = frame->local; + construct_reserve_unlock (&unlock, frame, lock); + + STACK_WIND (frame, client_remove_reserve_lock_cbk, + this, this->fops->lk, + lock->fd, F_RESLK_UNLCK, &unlock); +} + +static client_posix_lock_t * +get_next_recovery_lock (xlator_t *this, clnt_local_t *local) +{ + client_posix_lock_t *lock = NULL; + + pthread_mutex_lock (&local->mutex); + { + if (list_empty (&local->lock_list)) { + gf_log (this->name, GF_LOG_DEBUG, + "lock-list empty"); + goto unlock; + } + + lock = list_entry ((local->lock_list).next, typeof (*lock), list); + list_del_init (&lock->list); + } +unlock: + pthread_mutex_unlock (&local->mutex); + + return lock; + +} + +int32_t +client_reserve_lock_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + + uint64_t fd_count = 0; + + local = frame->local; + conf = this->private; + + /* Got the reserve lock. Check if lock is grantable and proceed + with the real lk call */ + + if (op_ret >= 0) { + /* Lock is grantable if flock reflects a successful getlk() call*/ + if (lock->l_type == F_UNLCK && lock->l_pid) { + gf_log (this->name, GF_LOG_DEBUG, + "Got the reservelk, but the lock is not grantable. "); + client_remove_reserve_lock (this, frame, local->client_lock); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Reserve Lock succeeded"); + client_send_recovery_lock (frame, this, local->client_lock); + goto out; + } + + /* Somebody else has a reserve lk. Lock conflict detected. + Mark fd as bad */ + + gf_log (this->name, GF_LOG_DEBUG, + "Reservelk OP failed. Aborting lock recovery and marking bad fd"); + + client_mark_bad_fd (local->client_lock->fd, + local->fdctx); + destroy_client_lock (local->client_lock); + frame->local = NULL; + client_local_wipe (local); + STACK_DESTROY (frame->root); + fd_count = decrement_reopen_fd_count (this, conf); + gf_log (this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + +out: + return 0; +} + +int32_t +client_recovery_lock_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + clnt_local_t *local = NULL; + clnt_fd_ctx_t *fdctx = NULL; + clnt_conf_t *conf = NULL; + client_posix_lock_t *next_lock = NULL; + + struct flock reserve_flock; + uint64_t fd_count = 0; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + /* TODO: critical error describing recovery command + and blanket on ops on fd */ + gf_log (this->name, GF_LOG_CRITICAL, + "Lock recovery failed with error msg=%s", + strerror(op_errno)); + + client_mark_bad_fd (local->client_lock->fd, + local->fdctx); + goto cleanup; + + /* Lock recovered. Continue with reserve lock for next lock */ + } else { + gf_log (this->name, GF_LOG_DEBUG, + "lock recovered successfully - Continuing with next lock."); + + next_lock = get_next_recovery_lock (this, local); + if (!next_lock) { + gf_log (this->name, GF_LOG_DEBUG, + "All locks recovered on fd"); + goto cleanup; + } + + construct_reserve_lock (next_lock, frame, &reserve_flock); + local->fdctx = fdctx; + local->client_lock = next_lock; + + STACK_WIND (frame, client_reserve_lock_cbk, + this, this->fops->lk, + next_lock->fd, F_RESLK_LCK, &reserve_flock); + goto out; + + } + +cleanup: + frame->local = NULL; + client_local_wipe (local); + if (local->client_lock) + destroy_client_lock (local->client_lock); + STACK_DESTROY (frame->root); + fd_count = decrement_reopen_fd_count (this, conf); + gf_log (this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + +out: + return 0; +} + +static int +client_send_recovery_lock (call_frame_t *frame, xlator_t *this, + client_posix_lock_t *lock) +{ + + frame->root->lk_owner = lock->owner; + + /* Send all locks as F_SETLK to prevent the frame + from blocking if there is a conflict */ + + STACK_WIND (frame, client_recovery_lock_cbk, + this, this->fops->lk, + lock->fd, F_SETLK, + &(lock->user_flock)); + + return 0; +} + +static int +client_lockrec_init (clnt_fd_ctx_t *fdctx, clnt_local_t *local) +{ + + INIT_LIST_HEAD (&local->lock_list); + pthread_mutex_init (&local->mutex, NULL); + + pthread_mutex_lock (&fdctx->mutex); + { + list_splice_init (&fdctx->lock_list, &local->lock_list); + } + pthread_mutex_unlock (&fdctx->mutex); + + return 0; +} + +int +client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ + call_frame_t *frame = NULL; + clnt_local_t *local = NULL; + client_posix_lock_t *lock = NULL; + + struct flock reserve_flock; + int ret = 0; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -ENOMEM; + goto out; + } + + client_lockrec_init (fdctx, local); + + lock = get_next_recovery_lock (this, local); + if (!lock) { + gf_log (this->name, GF_LOG_DEBUG, + "No locks on fd"); + ret = -1; + goto out; + } + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + construct_reserve_lock (lock, frame, &reserve_flock); + + local->fdctx = fdctx; + local->client_lock = lock; + + STACK_WIND (frame, client_reserve_lock_cbk, + this, this->fops->lk, + lock->fd, F_RESLK_LCK, &reserve_flock); + +out: + return ret; + + +} + +int32_t +client_dump_locks (char *name, inode_t *inode, + dict_t *dict) +{ + int ret = 0; + char dict_string[256]; + + ret = dump_client_locks (inode); + snprintf (dict_string, 256, "%d locks dumped in log file", ret); + + dict = dict_new (); + if (!dict) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "trusted.glusterfs.clientlk-dump", dict_string); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Could not set dict with %s", CLIENT_DUMP_LOCKS); + goto out; + } + +out: + return ret; +} + +int32_t +is_client_dump_locks_cmd (char *name) +{ + int ret = 0; + + if (strcmp (name, CLIENT_DUMP_LOCKS) == 0) + ret = 1; + + return ret; +} diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h index 638e537d116..50015b18fd1 100644 --- a/xlators/protocol/client/src/client-mem-types.h +++ b/xlators/protocol/client/src/client-mem-types.h @@ -28,6 +28,7 @@ enum gf_client_mem_types_ { gf_client_mt_clnt_local_t, gf_client_mt_clnt_req_buf_t, gf_client_mt_clnt_fdctx_t, + gf_client_mt_clnt_lock_t, gf_client_mt_end, }; #endif /* __CLIENT_MEM_TYPES_H__ */ diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 726a9dad792..b703b88f4b7 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1865,6 +1865,8 @@ init (xlator_t *this) pthread_mutex_init (&conf->lock, NULL); INIT_LIST_HEAD (&conf->saved_fds); + LOCK_INIT (&conf->rec_lock); + this->private = conf; /* If it returns -1, then its a failure, if it returns +1 we need diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 84940de5f84..a31873a027b 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -33,7 +33,7 @@ /* FIXME: Needs to be defined in a common file */ #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect" #define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect" - +#define CLIENT_DUMP_LOCKS "trusted.glusterfs.clientlk-dump" struct clnt_options { char *remote_subvolume; int ping_timeout; @@ -54,6 +54,10 @@ typedef struct clnt_conf { rpc_clnt_prog_t *mgmt; rpc_clnt_prog_t *handshake; rpc_clnt_prog_t *dump; + + uint64_t reopen_fd_count; /* Count of fds reopened after a + connection is established */ + gf_lock_t rec_lock; } clnt_conf_t; typedef struct _client_fd_ctx { @@ -68,8 +72,24 @@ typedef struct _client_fd_ctx { char released; int32_t flags; int32_t wbflags; + + pthread_mutex_t mutex; + struct list_head lock_list; /* List of all granted locks on this fd */ } clnt_fd_ctx_t; +typedef struct _client_posix_lock { + fd_t *fd; /* The fd on which the lk operation was made */ + + struct flock user_flock; /* the flock supplied by the user */ + off_t fl_start; + off_t fl_end; + short fl_type; + int32_t cmd; /* the cmd for the lock call */ + uint64_t owner; /* lock owner from fuse */ + + struct list_head list; /* reference used to add to the fdctx list of locks */ +} client_posix_lock_t; + typedef struct client_local { loc_t loc; loc_t loc2; @@ -79,6 +99,12 @@ typedef struct client_local { uint32_t wbflags; struct iobref *iobref; fop_cbk_fn_t op; + + client_posix_lock_t *client_lock; + uint64_t owner; + int32_t cmd; + struct list_head lock_list; + pthread_mutex_t mutex; } clnt_local_t; typedef struct client_args { @@ -138,6 +164,17 @@ int unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries int clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp); int clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp); - - +int client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx); +int32_t delete_granted_locks_owner (fd_t *fd, uint64_t owner); +int client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner, + int32_t cmd); +uint64_t decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf); +int32_t delete_granted_locks_fd (clnt_fd_ctx_t *fdctx); +int32_t client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd); +void client_save_number_fds (clnt_conf_t *conf, int count); +int dump_client_locks (inode_t *inode); +int client_notify_parents_child_up (xlator_t *this); +int32_t is_client_dump_locks_cmd (char *name); +int32_t client_dump_locks (char *name, inode_t *inode, + dict_t *dict); #endif /* !_CLIENT_H */ diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 69f2646978d..c3add8fd3ba 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -313,6 +313,7 @@ client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->wbflags = local->wbflags; INIT_LIST_HEAD (&fdctx->sfd_pos); + INIT_LIST_HEAD (&fdctx->lock_list); this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -605,10 +606,14 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { call_frame_t *frame = NULL; + clnt_local_t *local = NULL; + xlator_t *this = NULL; gf_common_rsp rsp = {0,}; int ret = 0; frame = myframe; + this = THIS; + local = frame->local; if (-1 == req->rpc_status) { rsp.op_ret = -1; @@ -623,6 +628,18 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } + if (rsp.op_ret >= 0) { + /* Delete all saved locks of the owner issuing flush */ + gf_log (this->name, GF_LOG_DEBUG, + "Attempting to delete locks of owner=%llu", + (long long unsigned) local->owner); + delete_granted_locks_owner (local->fd, local->owner); + } + + frame->local = NULL; + if (local) + client_local_wipe (local); + out: STACK_UNWIND_STRICT (flush, frame, rsp.op_ret, gf_error_to_errno (rsp.op_errno)); @@ -1442,6 +1459,7 @@ client3_1_create_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->flags = local->flags; INIT_LIST_HEAD (&fdctx->sfd_pos); + INIT_LIST_HEAD (&fdctx->lock_list); this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -1506,12 +1524,14 @@ int client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { - call_frame_t *frame = NULL; - struct flock lock = {0,}; - gfs3_lk_rsp rsp = {0,}; - int ret = 0; + call_frame_t *frame = NULL; + clnt_local_t *local = NULL; + struct flock lock = {0,}; + gfs3_lk_rsp rsp = {0,}; + int ret = 0; frame = myframe; + local = frame->local; if (-1 == req->rpc_status) { rsp.op_ret = -1; @@ -1531,6 +1551,20 @@ client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count, gf_flock_to_flock (&rsp.flock, &lock); } + /* Save the lock to the client lock cache to be able + to recover in the case of server reboot.*/ + if (local->cmd == F_SETLK || local->cmd == F_SETLKW) { + ret = client_add_lock_for_recovery (local->fd, &lock, + local->owner, local->cmd); + if (ret < 0) { + rsp.op_ret = -1; + rsp.op_errno = -ret; + } + } + + frame->local = NULL; + client_local_wipe (local); + out: STACK_UNWIND_STRICT (lk, frame, rsp.op_ret, gf_error_to_errno (rsp.op_errno), &lock); @@ -1777,6 +1811,7 @@ client3_1_opendir_cbk (struct rpc_req *req, struct iovec *iov, int count, fdctx->is_dir = 1; INIT_LIST_HEAD (&fdctx->sfd_pos); + INIT_LIST_HEAD (&fdctx->lock_list); this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -2014,12 +2049,14 @@ int client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { - int32_t ret = -1; - gfs3_open_rsp rsp = {0,}; - clnt_local_t *local = NULL; - clnt_conf_t *conf = NULL; - clnt_fd_ctx_t *fdctx = NULL; - call_frame_t *frame = NULL; + int32_t ret = -1; + gfs3_open_rsp rsp = {0,}; + int attempt_lock_recovery = _gf_false; + uint64_t fd_count = 0; + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + clnt_fd_ctx_t *fdctx = NULL; + call_frame_t *frame = NULL; frame = myframe; local = frame->local; @@ -2052,6 +2089,7 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, if (!fdctx->released) { list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + attempt_lock_recovery = _gf_true; fdctx = NULL; } } @@ -2060,6 +2098,20 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, } } + if (attempt_lock_recovery) { + ret = client_attempt_lock_recovery (frame->this, local->fdctx); + if (ret < 0) + gf_log (frame->this->name, GF_LOG_DEBUG, + "No locks on fd to recover"); + else { + fd_count = decrement_reopen_fd_count (frame->this, conf); + gf_log (frame->this->name, GF_LOG_DEBUG, + "Need to attempt lock recovery on %lld open fds", + (unsigned long long) fd_count); + + } + } + out: if (fdctx) client_fdctx_destroy (frame->this, fdctx); @@ -2380,6 +2432,9 @@ client3_1_release (call_frame_t *frame, xlator_t *this, if (remote_fd != -1) { req.fd = remote_fd; req.gfs_id = GFS3_OP_RELEASE; + + delete_granted_locks_fd (fdctx); + ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_RELEASE, client3_1_release_cbk, NULL, @@ -3468,7 +3523,6 @@ unwind: } - int32_t client3_1_flush (call_frame_t *frame, xlator_t *this, void *data) @@ -3477,8 +3531,9 @@ client3_1_flush (call_frame_t *frame, xlator_t *this, gfs3_flush_req req = {0,}; clnt_fd_ctx_t *fdctx = NULL; clnt_conf_t *conf = NULL; + clnt_local_t *local = NULL; int op_errno = ESTALE; - int ret = 0; + int ret = 0; if (!frame || !this || !data) goto unwind; @@ -3507,6 +3562,21 @@ client3_1_flush (call_frame_t *frame, xlator_t *this, goto unwind; } + conf = this->private; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of Memory"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + + } + + local->fd = fd_ref (args->fd); + local->owner = frame->root->lk_owner; + frame->local = local; + req.fd = fdctx->remote_fd; req.gfs_id = GFS3_OP_FLUSH; @@ -4012,16 +4082,23 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this, clnt_conf_t *conf = NULL; clnt_args_t *args = NULL; gfs3_getxattr_req req = {0,}; + dict_t *dict = NULL; int ret = 0; + int32_t op_ret = 0; int op_errno = ESTALE; - if (!frame || !this || !data) + if (!frame || !this || !data) { + op_ret = -1; + op_errno = 0; goto unwind; - + } args = data; - if (!(args->loc && args->loc->inode)) + if (!(args->loc && args->loc->inode)) { + op_ret = -1; + op_errno = EINVAL; goto unwind; + } memcpy (req.gfid, args->loc->inode->gfid, 16); req.namelen = 1; /* Use it as a flag */ @@ -4035,19 +4112,42 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this, conf = this->private; + if (args && args->name) { + if (is_client_dump_locks_cmd ((char *)args->name)) { + ret = client_dump_locks ((char *)args->name, + args->loc->inode, + dict); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Client dump locks failed"); + op_ret = -1; + op_errno = EINVAL; + } + + GF_ASSERT (dict); + op_ret = 0; + op_errno = 0; + goto unwind; + } + } + ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_GETXATTR, client3_1_getxattr_cbk, NULL, xdr_from_getxattr_req, NULL, 0, NULL, 0, NULL); if (ret) { + op_ret = -1; op_errno = ENOTCONN; goto unwind; } return 0; unwind: - STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, NULL); + if (dict) + dict_unref (dict); + return 0; } @@ -4242,20 +4342,28 @@ int32_t client3_1_lk (call_frame_t *frame, xlator_t *this, void *data) { - clnt_args_t *args = NULL; - gfs3_lk_req req = {0,}; - int32_t gf_cmd = 0; - int32_t gf_type = 0; - clnt_fd_ctx_t *fdctx = NULL; - clnt_conf_t *conf = NULL; - int op_errno = ESTALE; - int ret = 0; + clnt_args_t *args = NULL; + gfs3_lk_req req = {0,}; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + clnt_fd_ctx_t *fdctx = NULL; + clnt_local_t *local = NULL; + clnt_conf_t *conf = NULL; + int op_errno = ESTALE; + int ret = 0; if (!frame || !this || !data) goto unwind; args = data; conf = this->private; + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + op_errno = ENOMEM; + goto unwind; + } pthread_mutex_lock (&conf->lock); { @@ -4278,16 +4386,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this, goto unwind; } - if (args->cmd == F_GETLK || args->cmd == F_GETLK64) - gf_cmd = GF_LK_GETLK; - else if (args->cmd == F_SETLK || args->cmd == F_SETLK64) - gf_cmd = GF_LK_SETLK; - else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64) - gf_cmd = GF_LK_SETLKW; - else { + ret = client_cmd_to_gf_cmd (args->cmd, &gf_cmd); + if (ret) { + op_errno = EINVAL; gf_log (this->name, GF_LOG_DEBUG, "Unknown cmd (%d)!", gf_cmd); - goto unwind; } switch (args->flock->l_type) { @@ -4302,6 +4405,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this, break; } + local->owner = frame->root->lk_owner; + local->cmd = args->cmd; + local->fd = fd_ref (args->fd); + frame->local = local; + req.fd = fdctx->remote_fd; req.cmd = gf_cmd; req.type = gf_type; diff --git a/xlators/protocol/legacy/client/src/client-protocol.c b/xlators/protocol/legacy/client/src/client-protocol.c index 57cfbc73e33..ebb4e6fcf96 100644 --- a/xlators/protocol/legacy/client/src/client-protocol.c +++ b/xlators/protocol/legacy/client/src/client-protocol.c @@ -145,16 +145,26 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, client_fd_ctx_t *ctx) ret = fd_ctx_get (file, this, &oldaddr); if (ret >= 0) { - gf_log (this->name, GF_LOG_DEBUG, - "%s (%"PRId64"): trying duplicate remote fd set. ", - loc->path, loc->inode->ino); + if (loc) + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): trying duplicate remote fd set. ", + loc->path, loc->inode->ino); + else + gf_log (this->name, GF_LOG_DEBUG, + "%p: trying duplicate remote fd set. ", + file); } ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "%s (%"PRId64"): failed to set remote fd", - loc->path, loc->inode->ino); + if (loc) + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): failed to set remote fd", + loc->path, loc->inode->ino); + else + gf_log (this->name, GF_LOG_DEBUG, + "%p: failed to set remote fd", + file); } out: return; diff --git a/xlators/protocol/server/src/server3_1-fops.c b/xlators/protocol/server/src/server3_1-fops.c index a0276ad6bb4..9c0ac90cdf9 100644 --- a/xlators/protocol/server/src/server3_1-fops.c +++ b/xlators/protocol/server/src/server3_1-fops.c @@ -4851,6 +4851,15 @@ server_lk (rpcsvc_request_t *req) case GF_LK_SETLKW: state->cmd = F_SETLKW; break; + case GF_LK_RESLK_LCK: + state->cmd = F_RESLK_LCK; + break; + case GF_LK_RESLK_LCKW: + state->cmd = F_RESLK_LCKW; + break; + case GF_LK_RESLK_UNLCK: + state->cmd = F_RESLK_UNLCK; + break; } gf_flock_to_flock (&args.flock, &state->flock); |