summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavan Sondur <pavan@gluster.com>2010-09-30 02:25:31 +0000
committerVijay Bellur <vijay@dev.gluster.com>2010-09-30 11:19:24 -0700
commitaf18c636c44b1ea56296850e55afe0e4b2ce845c (patch)
tree40f8470ec000b96d61b3f8d53286aa0812c9d921
parent760daf28898cbb8b5072551735bebee16450ba08 (diff)
protocol/client: cluster/afr: Support lock recovery and self heal.
Signed-off-by: Pavan Vilas Sondur <pavan@gluster.com> Signed-off-by: Vijay Bellur <vijay@dev.gluster.com> BUG: 865 (Add locks recovery support in GlusterFS) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=865
-rw-r--r--libglusterfs/src/glusterfs.h15
-rw-r--r--xlators/cluster/afr/src/afr-common.c31
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c101
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h1
-rw-r--r--xlators/cluster/afr/src/afr.c3
-rw-r--r--xlators/cluster/afr/src/afr.h17
-rw-r--r--xlators/features/locks/src/Makefile.am2
-rw-r--r--xlators/features/locks/src/common.c9
-rw-r--r--xlators/features/locks/src/common.h14
-rw-r--r--xlators/features/locks/src/locks-mem-types.h1
-rw-r--r--xlators/features/locks/src/locks.h7
-rw-r--r--xlators/features/locks/src/posix.c256
-rw-r--r--xlators/features/locks/src/reservelk.c450
-rw-r--r--xlators/protocol/client/src/Makefile.am2
-rw-r--r--xlators/protocol/client/src/client-handshake.c65
-rw-r--r--xlators/protocol/client/src/client-helpers.c22
-rw-r--r--xlators/protocol/client/src/client-lk.c949
-rw-r--r--xlators/protocol/client/src/client-mem-types.h1
-rw-r--r--xlators/protocol/client/src/client.c2
-rw-r--r--xlators/protocol/client/src/client.h43
-rw-r--r--xlators/protocol/client/src/client3_1-fops.c172
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c22
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c9
23 files changed, 2120 insertions, 74 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index f107a8abfa4..9b5a0e99239 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -150,15 +150,28 @@ typedef enum {
GF_LK_GETLK = 0,
GF_LK_SETLK,
GF_LK_SETLKW,
+ GF_LK_RESLK_LCK,
+ GF_LK_RESLK_LCKW,
+ GF_LK_RESLK_UNLCK,
} glusterfs_lk_cmds_t;
typedef enum {
GF_LK_F_RDLCK = 0,
GF_LK_F_WRLCK,
- GF_LK_F_UNLCK
+ GF_LK_F_UNLCK,
+ GF_LK_RECLK,
} glusterfs_lk_types_t;
+typedef enum {
+ F_RESLK_LCK = 200,
+ F_RESLK_LCKW,
+ F_RESLK_UNLCK,
+} glusterfs_lk_recovery_cmds_t;
+
+typedef enum {
+ F_GETLK_FD = 250,
+} glusterfs_lk_rec_types_t;
typedef enum {
GF_LOCK_POSIX,
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 64e22c3229e..9d9f5d0414d 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1197,6 +1197,16 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)
fd_ctx->up_count = priv->up_count;
fd_ctx->down_count = priv->down_count;
+ fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->locked_on) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
INIT_LIST_HEAD (&fd_ctx->entries);
@@ -1426,6 +1436,9 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
if (fd_ctx->opened_on)
GF_FREE (fd_ctx->opened_on);
+ if (fd_ctx->locked_on)
+ GF_FREE (fd_ctx->locked_on);
+
GF_FREE (fd_ctx);
}
@@ -2298,8 +2311,9 @@ int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct flock *lock)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
int child_index = -1;
@@ -2339,7 +2353,18 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
} else {
/* locking has succeeded on all nodes that are up */
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ ret = afr_mark_locked_nodes (this, local->fd,
+ local->cont.lk.locked_nodes);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked nodes info in fdctx");
+
+ ret = afr_save_locked_fd (this, local->fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked fd");
+
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
&local->cont.lk.ret_flock);
}
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index ee53d1d7bfb..de95a6c763b 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1678,3 +1678,104 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
return 0;
}
+
+int
+afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
+ unsigned char *locked_nodes)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ uint64_t tmp = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ GF_ASSERT (fdctx->locked_on);
+
+ memcpy (fdctx->locked_on, locked_nodes,
+ priv->child_count);
+
+out:
+ return ret;
+}
+
+static int
+__is_fd_saved (xlator_t *this, fd_t *fd)
+{
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_private_t *priv = NULL;
+ int found = 0;
+
+ priv = this->private;
+
+ list_for_each_entry (locked_fd, &priv->saved_fds, list) {
+ if (locked_fd->fd == fd) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static int
+__afr_save_locked_fd (xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
+ gf_afr_mt_locked_fd);
+ if (!locked_fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&locked_fd->list);
+
+ list_add_tail (&locked_fd->list, &priv->saved_fds);
+
+out:
+ return ret;
+}
+
+int
+afr_save_locked_fd (xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->mutex);
+ {
+ if (__is_fd_saved (this, fd)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p already saved", fd);
+ goto unlock;
+ }
+
+ ret = __afr_save_locked_fd (this, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p could not be saved");
+ goto unlock;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->mutex);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index c1a86b8275b..31a80e8f57a 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -41,6 +41,7 @@ enum gf_afr_mem_types_ {
gf_afr_mt_loc_t,
gf_afr_mt_entry_name,
gf_afr_mt_pump_priv,
+ gf_afr_mt_locked_fd,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 09094bdbe58..69b281d973e 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -641,6 +641,9 @@ init (xlator_t *this)
priv->first_lookup = 1;
priv->root_inode = NULL;
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->saved_fds);
+
ret = 0;
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 26f2c989f4e..68b4a1e305e 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -85,6 +85,9 @@ typedef struct _afr_private {
struct _pump_private *pump_private; /* Set if we are loaded as pump */
int use_afr_in_pump;
+
+ pthread_mutex_t mutex;
+ struct list_head saved_fds; /* list of fds on which locks have succeeded */
} afr_private_t;
typedef struct {
@@ -262,6 +265,11 @@ typedef struct {
} afr_internal_lock_t;
+typedef struct _afr_locked_fd {
+ fd_t *fd;
+ struct list_head list;
+} afr_locked_fd_t;
+
typedef struct _afr_local {
unsigned int call_count;
unsigned int success_count;
@@ -619,6 +627,8 @@ typedef struct {
int hit, miss;
gf_boolean_t failed_over;
struct list_head entries; /* needed for readdir failover */
+
+ unsigned char *locked_on; /* which subvolumes locks have been successful */
} afr_fd_ctx_t;
@@ -655,6 +665,13 @@ int32_t
afr_notify (xlator_t *this, int32_t event,
void *data, ...);
+int
+afr_save_locked_fd (xlator_t *this, fd_t *fd);
+
+int
+afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
+ unsigned char *locked_nodes);
+
void
afr_set_lk_owner (call_frame_t *frame, xlator_t *this);
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
index d10b874befb..53dd3aa5da5 100644
--- a/xlators/features/locks/src/Makefile.am
+++ b/xlators/features/locks/src/Makefile.am
@@ -3,7 +3,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
locks_la_LDFLAGS = -module -avoidversion
-locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c
+locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c
locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = locks.h common.h locks-mem-types.h
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index b34cd97813a..9f2d11304df 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -103,6 +103,12 @@ fd_to_fdnum (fd_t *fd)
return ((unsigned long) fd);
}
+fd_t *
+fd_from_fdnum (posix_lock_t *lock)
+{
+ return ((fd_t *) lock->fd_num);
+}
+
int
__pl_inode_is_empty (pl_inode_t *pl_inode)
{
@@ -441,6 +447,9 @@ pl_inode_get (xlator_t *this, inode_t *inode)
INIT_LIST_HEAD (&pl_inode->dom_list);
INIT_LIST_HEAD (&pl_inode->ext_list);
INIT_LIST_HEAD (&pl_inode->rw_list);
+ INIT_LIST_HEAD (&pl_inode->reservelk_list);
+ INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
+ INIT_LIST_HEAD (&pl_inode->blocked_calls);
inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index d707294475f..c7d817f8da0 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -116,4 +116,18 @@ pl_trace_release (xlator_t *this, fd_t *fd);
unsigned long
fd_to_fdnum (fd_t *fd);
+fd_t *
+fd_from_fdnum (posix_lock_t *lock);
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block);
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2);
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block);
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index cf50240863b..f441f95cfa9 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -33,6 +33,7 @@ enum gf_locks_mem_types_ {
gf_locks_mt_pl_rw_req_t,
gf_locks_mt_posix_locks_private_t,
gf_locks_mt_pl_local_t,
+ gf_locks_mt_pl_fdctx_t,
gf_locks_mt_end
};
#endif
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 60474615e5f..483e3e6669e 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -125,6 +125,9 @@ struct __pl_inode {
struct list_head dom_list; /* list of domains */
struct list_head ext_list; /* list of fcntl locks */
struct list_head rw_list; /* list of waiting r/w requests */
+ struct list_head reservelk_list; /* list of reservelks */
+ struct list_head blocked_reservelks; /* list of blocked reservelks */
+ struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/
int mandatory; /* if mandatory locking is enabled */
inode_t *refkeeper; /* hold refs on an inode while locks are
@@ -150,4 +153,8 @@ typedef struct {
gf_boolean_t posixlk_count_req;
} pl_local_t;
+typedef struct {
+ struct list_head locks_list;
+} pl_fdctx_t;
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 165280265ad..f085594243c 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -741,6 +741,194 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
return 0;
}
+static int
+__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd)
+{
+ int found = 0;
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static posix_lock_t *
+lock_dup (posix_lock_t *lock)
+{
+ posix_lock_t *new_lock = NULL;
+
+ new_lock = new_posix_lock (&lock->user_flock, lock->transport,
+ lock->client_pid, lock->owner,
+ (fd_t *)lock->fd_num);
+ return new_lock;
+}
+
+static int
+__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *duplock = NULL;
+ int ret = 0;
+
+ fdctx = GF_CALLOC (1, sizeof (*fdctx),
+ gf_locks_mt_pl_fdctx_t);
+ if (!fdctx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&fdctx->locks_list);
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ duplock = lock_dup (l);
+ if (!duplock) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Out of memory");
+ ret = -1;
+ break;
+ }
+
+ list_add_tail (&duplock->list, &fdctx->locks_list);
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ int ret = 0;
+
+ ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret)
+ goto out;
+
+ ret = fd_ctx_set (fd, THIS, (uint64_t) (unsigned long)&fdctx);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Failed to set fdctx");
+out:
+ return ret;
+
+}
+
+static void
+pl_mark_eol_lock (posix_lock_t *lock)
+{
+ lock->user_flock.l_type = GF_LK_RECLK;
+ return;
+}
+
+static posix_lock_t *
+__get_next_fdctx_lock (pl_fdctx_t *fdctx)
+{
+ posix_lock_t *lock = NULL;
+
+ GF_ASSERT (fdctx);
+
+ if (list_empty (&fdctx->locks_list)) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "fdctx lock list empty");
+ goto out;
+ }
+
+ lock = list_entry (&fdctx->locks_list, typeof (*lock),
+ list);
+
+ GF_ASSERT (lock);
+
+ list_del_init (&lock->list);
+
+out:
+ return lock;
+}
+
+static int
+__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ int ret = 0;
+
+ GF_ASSERT (fdctx);
+
+ lock = __get_next_fdctx_lock (fdctx);
+ if (!lock) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "marking EOL in reqlock");
+ pl_mark_eol_lock (reqlock);
+ goto out;
+ }
+
+ reqlock->user_flock = lock->user_flock;
+
+out:
+ if (lock)
+ __destroy_lock (lock);
+
+ return ret;
+}
+static int
+pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode,
+ fd_t *fd, posix_lock_t *reqlock)
+{
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (!__fd_has_locks (pl_inode, fd)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p has no active locks", fd);
+ ret = 0;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "There are active locks on fd");
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ fdctx = (pl_fdctx_t *) tmp;
+ if (ret) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no fdctx -> copying all locks on fd");
+
+ ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ goto unlock;
+ }
+
+ ret = __set_next_lock_fd (fdctx, reqlock);
+
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fdctx present -> returning the next lock");
+ ret = __set_next_lock_fd (fdctx, reqlock);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not get next lock of fd");
+ goto unlock;
+ }
+ }
+ }
+
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+
+}
int
pl_lk (call_frame_t *frame, xlator_t *this,
@@ -791,6 +979,68 @@ pl_lk (call_frame_t *frame, xlator_t *this,
switch (cmd) {
+ case F_RESLK_LCKW:
+ can_block = 1;
+
+ /* fall through */
+ case F_RESLK_LCK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
+ reqlock->frame = frame;
+ reqlock->this = this;
+
+ ret = pl_reserve_setlk (this, pl_inode, reqlock,
+ can_block);
+ if (ret < 0) {
+ if (can_block)
+ goto out;
+
+ op_ret = -1;
+ op_errno = -ret;
+ __destroy_lock (reqlock);
+ goto unwind;
+ }
+ /* Finally a getlk and return the call */
+ conf = pl_getlk (pl_inode, reqlock);
+ if (conf)
+ posix_lock_to_flock (conf, flock);
+ break;
+
+ case F_RESLK_UNLCK:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_reserve_unlock (this, pl_inode, reqlock);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ __destroy_lock (reqlock);
+ goto unwind;
+
+ break;
+
+ case F_GETLK_FD:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ GF_ASSERT (ret >= 0);
+
+ ret = pl_getlk_fd (this, pl_inode, fd, reqlock);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getting locks on fd failed");
+ op_ret = -1;
+ op_errno = ENOLCK;
+ goto unwind;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Replying with a lock on fd for healing");
+
+ posix_lock_to_flock (reqlock, flock);
+ __destroy_lock (reqlock);
+
+ break;
+
#if F_GETLK != F_GETLK64
case F_GETLK64:
#endif
@@ -816,6 +1066,12 @@ pl_lk (call_frame_t *frame, xlator_t *this,
#endif
case F_SETLK:
memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock blocked due to conflicting reserve lock");
+ goto out;
+ }
ret = pl_setlk (this, pl_inode, reqlock,
can_block);
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
new file mode 100644
index 00000000000..c36484c46cc
--- /dev/null
+++ b/xlators/features/locks/src/reservelk.c
@@ -0,0 +1,450 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+void
+__delete_reserve_lock (posix_lock_t *lock)
+{
+ list_del (&lock->list);
+}
+
+void
+__destroy_reserve_lock (posix_lock_t *lock)
+{
+ FREE (lock);
+}
+
+/* Return true if the two reservelks have exactly same lock boundaries */
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2)
+{
+ if ((l1->fl_start == l2->fl_start) &&
+ (l1->fl_end == l2->fl_end))
+ return 1;
+
+ return 0;
+}
+
+/* Determine if lock is grantable or not */
+static posix_lock_t *
+__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ xlator_t *this = NULL;
+ posix_lock_t *l = NULL;
+ posix_lock_t *ret_lock = NULL;
+
+ this = THIS;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No reservelks in list");
+ goto out;
+ }
+ list_for_each_entry (l, &pl_inode->reservelk_list, list){
+ if (reservelks_equal (lock, l)) {
+ ret_lock = l;
+ break;
+ }
+ }
+out:
+ return ret_lock;
+}
+
+static int
+__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return ((l1->owner == l2->owner));
+
+}
+
+static posix_lock_t *
+__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "reservelk list empty");
+ return NULL;
+ }
+
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "equal reservelk found");
+ break;
+ }
+ }
+
+ return l;
+}
+
+static int
+__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock)
+{
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ conf = __matching_reservelk (pl_inode, lock);
+ if (conf) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Matching reservelk found");
+ if (__same_owner_reservelk (lock, conf)) {
+ list_del_init (&conf->list);
+ gf_log (this->name, GF_LOG_TRACE,
+ "Removing the matching reservelk for setlk to progress");
+ FREE (conf);
+ ret = 0;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Conflicting reservelk found");
+ ret = 1;
+ }
+
+ }
+ return ret;
+
+}
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (__reservelk_conflict (this, pl_inode, lock)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Found conflicting reservelk. Blocking until reservelk is unlocked.");
+ lock->blocked = can_block;
+ list_add_tail (&lock->list, &pl_inode->blocked_calls);
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return ret;
+
+}
+
+
+/* Determines if lock can be granted and adds the lock. If the lock
+ * is blocking, adds it to the blocked_reservelks.
+ */
+static int
+__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ posix_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ conf = __reservelk_grantable (pl_inode, lock);
+ if (conf){
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ list_add_tail (&lock->list, &pl_inode->blocked_reservelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->owner,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+
+ list_add (&lock->list, &pl_inode->reservelk_list);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static posix_lock_t *
+find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+ posix_lock_t *l = NULL;
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock))
+ return l;
+ }
+ return NULL;
+}
+
+/* Set F_UNLCK removes a lock which has the exact same lock boundaries
+ * as the UNLCK lock specifies. If such a lock is not found, returns invalid
+ */
+static posix_lock_t *
+__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+
+ posix_lock_t *conf = NULL;
+
+ conf = find_matching_reservelk (lock, pl_inode);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock not found for unlock");
+ goto out;
+ }
+ __delete_reserve_lock (conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock");
+
+out:
+ return conf;
+
+
+}
+
+static void
+__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = __lock_reservelk (this, pl_inode, bl, 1);
+
+ if (bl_ret == 0) {
+ list_add (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+/* Grant all reservelks blocked on lock(s) */
+void
+grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&granted);
+
+ if (list_empty (&pl_inode->blocked_reservelks)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked locks to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_reserve_locks (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->owner,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock);
+ }
+
+}
+
+static void
+__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked);
+
+ if (bl_ret == 0) {
+ list_add_tail (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+void
+grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+ fd_t *fd = NULL;
+
+ int can_block = 0;
+ int32_t cmd = 0;
+ int ret = 0;
+
+ if (list_empty (&pl_inode->blocked_calls)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked lock calls to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_lock_calls (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ fd = fd_from_fdnum (lock);
+
+ if (lock->blocked) {
+ can_block = 1;
+ cmd = F_SETLKW;
+ }
+ else
+ cmd = F_SETLK;
+
+ lock->blocked = 0;
+ ret = pl_setlk (this, pl_inode, lock, can_block);
+ if (ret == -1) {
+ if (can_block) {
+ pl_trace_block (this, lock->frame, fd, NULL,
+ cmd, &lock->user_flock, NULL);
+ continue;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ pl_trace_out (this, lock->frame, fd, NULL, cmd,
+ &lock->user_flock, -1, EAGAIN, NULL);
+ pl_update_refkeeper (this, fd->inode);
+ STACK_UNWIND_STRICT (lk, lock->frame, -1, EAGAIN, &lock->user_flock);
+ __destroy_lock (lock);
+ }
+ }
+
+ }
+
+}
+
+
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *retlock = NULL;
+ int ret = -1;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ retlock = __reserve_unlock_lock (this, lock, pl_inode);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Reservelk Unlock successful");
+ __destroy_reserve_lock (retlock);
+ ret = 0;
+ }
+out:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_reserve_locks (this, pl_inode);
+ grant_blocked_lock_calls (this, pl_inode);
+
+ return ret;
+
+}
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ int ret = -EINVAL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ ret = __lock_reservelk (this, pl_inode, lock, can_block);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->owner,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->owner,
+ lock->fl_start,
+ lock->fl_end);
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+}
diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am
index 007810e9901..f2dea68d70d 100644
--- a/xlators/protocol/client/src/Makefile.am
+++ b/xlators/protocol/client/src/Makefile.am
@@ -9,7 +9,7 @@ client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la
client_la_SOURCES = client.c client-helpers.c client3_1-fops.c \
- client-handshake.c client-callback.c
+ client-handshake.c client-callback.c client-lk.c
noinst_HEADERS = client.h client-mem-types.h
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 7511813d3cb..8b0c90ebc8c 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -343,14 +343,38 @@ unwind:
}
int
+client_notify_parents_child_up (xlator_t *this)
+{
+ xlator_list_t *parent = NULL;
+
+ /* As fuse is not 'parent' of any translator now, triggering its
+ CHILD_UP event is hacky in case client has only client protocol */
+ if (!this->parents && this->ctx && this->ctx->master) {
+ /* send notify to 'ctx->master' if it exists */
+ xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP,
+ this->graph);
+ }
+
+ parent = this->parents;
+ while (parent) {
+ xlator_notify (parent->xlator, GF_EVENT_CHILD_UP,
+ this);
+ parent = parent->next;
+ }
+
+ return 0;
+}
+
+int
client_post_handshake (call_frame_t *frame, xlator_t *this)
{
clnt_conf_t *conf = NULL;
clnt_fd_ctx_t *tmp = NULL;
clnt_fd_ctx_t *fdctx = NULL;
- xlator_list_t *parent = NULL;
struct list_head reopen_head;
+ int count = 0;
+
if (!this || !this->private)
goto out;
@@ -366,34 +390,33 @@ client_post_handshake (call_frame_t *frame, xlator_t *this)
list_del_init (&fdctx->sfd_pos);
list_add_tail (&fdctx->sfd_pos, &reopen_head);
+ count++;
}
}
pthread_mutex_unlock (&conf->lock);
- list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) {
- list_del_init (&fdctx->sfd_pos);
+ /* Delay notifying CHILD_UP to parents
+ until all locks are recovered */
+ if (count > 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%d fds open - Delaying child_up until they are re-opened",
+ count);
+ client_save_number_fds (conf, count);
- if (fdctx->is_dir)
- protocol_client_reopendir (this, fdctx);
- else
- protocol_client_reopen (this, fdctx);
- }
+ list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) {
+ list_del_init (&fdctx->sfd_pos);
- /* As fuse is not 'parent' of any translator now, triggering its
- CHILD_UP event is hacky in case client has only client protocol */
- if (!this->parents && this->ctx && this->ctx->master) {
- /* send notify to 'ctx->master' if it exists */
- xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP,
- this->graph);
- }
+ if (fdctx->is_dir)
+ protocol_client_reopendir (this, fdctx);
+ else
+ protocol_client_reopen (this, fdctx);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No open fds - notifying all parents child up");
+ client_notify_parents_child_up (this);
- parent = this->parents;
- while (parent) {
- xlator_notify (parent->xlator, GF_EVENT_CHILD_UP,
- this);
- parent = parent->next;
}
-
out:
return 0;
}
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index d8da60aa1ae..d1282d50c90 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -76,16 +76,26 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, clnt_fd_ctx_t *ctx)
ret = fd_ctx_get (file, this, &oldaddr);
if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): trying duplicate remote fd set. ",
- loc->path, loc->inode->ino);
+ if (loc)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (%"PRId64"): trying duplicate remote fd set. ",
+ loc->path, loc->inode->ino);
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%p: trying duplicate remote fd set. ",
+ file);
}
ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to set remote fd",
- loc->path, loc->inode->ino);
+ if (loc)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (%"PRId64"): failed to set remote fd",
+ loc->path, loc->inode->ino);
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%p: failed to set remote fd",
+ file);
}
out:
return;
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
new file mode 100644
index 00000000000..4ad7fc2b1ec
--- /dev/null
+++ b/xlators/protocol/client/src/client-lk.c
@@ -0,0 +1,949 @@
+/*
+ Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include "common-utils.h"
+#include "xlator.h"
+#include "client.h"
+
+static void
+__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock);
+
+static int
+client_send_recovery_lock (call_frame_t *frame, xlator_t *this,
+ client_posix_lock_t *lock);
+static void
+__dump_client_lock (client_posix_lock_t *lock)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "{fd=%lld}"
+ "{%s lk-owner:%"PRIu64" %"PRId64" - %"PRId64"}"
+ "{start=%"PRId64" end=%"PRId64"}",
+ (unsigned long long)lock->fd,
+ lock->fl_type == F_WRLCK ? "Write-Lock" : "Read-Lock",
+ lock->owner,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len,
+ lock->fl_start,
+ lock->fl_end);
+}
+
+static int
+dump_client_locks_fd (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *lock = NULL;
+ int count = 0;
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_for_each_entry (lock, &fdctx->lock_list, list) {
+ __dump_client_lock (lock);
+ count++;
+ }
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ return count;
+
+}
+
+int
+dump_client_locks (inode_t *inode)
+{
+ fd_t *fd = NULL;
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ int total_count = 0;
+ int locks_fd_count = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (fd, &inode->fd_list, inode_list) {
+ locks_fd_count = 0;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (fdctx)
+ locks_fd_count = dump_client_locks_fd (fdctx);
+
+ total_count += locks_fd_count;
+ }
+
+ }
+ UNLOCK (&inode->lock);
+
+ return total_count;
+
+}
+
+static off_t
+__get_lock_length (off_t start, off_t end)
+{
+ if (end == LLONG_MAX)
+ return 0;
+ else
+ return (end - start + 1);
+}
+
+/* Add two locks */
+static client_posix_lock_t *
+add_locks (client_posix_lock_t *l1, client_posix_lock_t *l2)
+{
+ client_posix_lock_t *sum = NULL;
+
+ sum = GF_CALLOC (1, sizeof (*sum), gf_client_mt_clnt_lock_t);
+ if (!sum)
+ return NULL;
+
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
+
+ sum->user_flock.l_start = sum->fl_start;
+ sum->user_flock.l_len = __get_lock_length (sum->fl_start,
+ sum->fl_end);
+
+ return sum;
+}
+
+/* Return true if the locks have the same owner */
+static int
+same_owner (client_posix_lock_t *l1, client_posix_lock_t *l2)
+{
+ return ((l1->owner == l2->owner));
+}
+
+/* Return true if the locks overlap, false otherwise */
+static int
+locks_overlap (client_posix_lock_t *l1, client_posix_lock_t *l2)
+{
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
+}
+
+static void
+__delete_client_lock (client_posix_lock_t *lock)
+{
+ list_del_init (&lock->list);
+}
+
+/* Destroy a posix_lock */
+static void
+__destroy_client_lock (client_posix_lock_t *lock)
+{
+ free (lock);
+}
+
+/* Subtract two locks */
+struct _values {
+ client_posix_lock_t *locks[3];
+};
+
+/* {big} must always be contained inside {small} */
+static struct _values
+subtract_locks (client_posix_lock_t *big, client_posix_lock_t *small)
+{
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t );
+ GF_ASSERT (v.locks[0]);
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ }
+ else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+ v.locks[2] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[2]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start,
+ v.locks[0]->fl_end);
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ memcpy (v.locks[2], big, sizeof (client_posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ v.locks[2]->user_flock.l_start = small->fl_end + 1;
+ }
+ /* one edge coincides with big */
+ else if (small->fl_start == big->fl_start) {
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+ v.locks[0]->user_flock.l_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ }
+ else if (small->fl_end == big->fl_end) {
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start,
+ v.locks[0]->fl_end);
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ }
+ else {
+ gf_log ("client-protocol", GF_LOG_ERROR,
+ "Unexpected case in subtract_locks. Please send "
+ "a bug report to gluster-devel@nongnu.org");
+ }
+
+ return v;
+}
+
+static void
+__delete_unlck_locks (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *l = NULL;
+ client_posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (l, tmp, &fdctx->lock_list, list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_client_lock (l);
+ __destroy_client_lock (l);
+ }
+ }
+}
+
+static void
+__insert_lock (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ list_add_tail (&lock->list, &fdctx->lock_list);
+
+ return;
+}
+
+static void
+__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ client_posix_lock_t *conf = NULL;
+ client_posix_lock_t *t = NULL;
+ client_posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = { .locks = {0, 0, 0} };
+
+ list_for_each_entry_safe (conf, t, &fdctx->lock_list, list) {
+ if (!locks_overlap (conf, lock))
+ continue;
+
+ if (same_owner (conf, lock)) {
+ if (conf->fl_type == lock->fl_type) {
+ sum = add_locks (lock, conf);
+
+ sum->fd = lock->fd;
+
+ __delete_client_lock (conf);
+ __destroy_client_lock (conf);
+
+ __destroy_client_lock (lock);
+ __insert_and_merge (fdctx, sum);
+
+ return;
+ } else {
+ sum = add_locks (lock, conf);
+
+ sum->fd = conf->fd;
+ sum->owner = conf->owner;
+
+ v = subtract_locks (sum, lock);
+
+ __delete_client_lock (conf);
+ __destroy_client_lock (conf);
+
+ __delete_client_lock (lock);
+ __destroy_client_lock (lock);
+
+ __destroy_client_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD (&v.locks[i]->list);
+ __insert_and_merge (fdctx,
+ v.locks[i]);
+ }
+
+ __delete_unlck_locks (fdctx);
+ return;
+ }
+ }
+
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
+
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ __insert_lock (fdctx, lock);
+ return;
+ }
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ __insert_lock (fdctx, lock);
+ } else {
+ __destroy_client_lock (lock);
+ }
+}
+
+static void
+client_setlk (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ __insert_and_merge (fdctx, lock);
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ return;
+}
+
+static void
+destroy_client_lock (client_posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+int32_t
+delete_granted_locks_owner (fd_t *fd, uint64_t owner)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ client_posix_lock_t *lock = NULL;
+ client_posix_lock_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ struct list_head delete_list;
+ int ret = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD (&delete_list);
+ this = THIS;
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fdctx not valid");
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &fdctx->lock_list, list) {
+ if (lock->owner == owner) {
+ list_del_init (&lock->list);
+ list_add_tail (&lock->list, &delete_list);
+ count++;
+ }
+ }
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &delete_list, list) {
+ list_del_init (&lock->list);
+ destroy_client_lock (lock);
+ }
+
+/* FIXME: Need to actually print the locks instead of count */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Number of locks cleared=%d", count);
+
+out:
+ return ret;
+}
+
+int32_t
+delete_granted_locks_fd (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *lock = NULL;
+ client_posix_lock_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ struct list_head delete_list;
+ int ret = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD (&delete_list);
+ this = THIS;
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_splice_init (&fdctx->lock_list, &delete_list);
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &delete_list, list) {
+ list_del_init (&lock->list);
+ count++;
+ destroy_client_lock (lock);
+ }
+
+ /* FIXME: Need to actually print the locks instead of count */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Number of locks cleared=%d", count);
+
+ return ret;
+}
+
+static void
+client_mark_bad_fd (fd_t *fd, clnt_fd_ctx_t *fdctx)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (fdctx)
+ fdctx->remote_fd = -1;
+ this_fd_set_ctx (fd, this, NULL, fdctx);
+}
+
+int32_t
+client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd)
+{
+ int ret = 0;
+
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ *gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ *gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ *gf_cmd = GF_LK_SETLKW;
+ else if (cmd == F_RESLK_LCK)
+ *gf_cmd = GF_LK_RESLK_LCK;
+ else if (cmd == F_RESLK_LCKW)
+ *gf_cmd = GF_LK_RESLK_LCKW;
+ else if (cmd == F_RESLK_UNLCK)
+ *gf_cmd = GF_LK_RESLK_UNLCK;
+ else
+ ret = -1;
+
+ return ret;
+
+}
+
+static client_posix_lock_t *
+new_client_lock (struct flock *flock, uint64_t owner,
+ int32_t cmd, fd_t *fd)
+{
+ client_posix_lock_t *new_lock = NULL;
+ xlator_t *this = NULL;
+
+
+ this = THIS;
+ new_lock = GF_CALLOC (1, sizeof (*new_lock),
+ gf_client_mt_clnt_lock_t);
+ if (!new_lock) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&new_lock->list);
+ new_lock->fd = fd;
+ memcpy (&new_lock->user_flock, flock, sizeof (struct flock));
+
+ new_lock->fl_type = flock->l_type;
+ new_lock->fl_start = flock->l_start;
+
+ if (flock->l_len == 0)
+ new_lock->fl_end = LLONG_MAX;
+ else
+ new_lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ new_lock->owner = owner;
+ new_lock->cmd = cmd; /* Not really useful */
+
+
+out:
+ return new_lock;
+}
+
+void
+client_save_number_fds (clnt_conf_t *conf, int count)
+{
+ LOCK (&conf->rec_lock);
+ {
+ conf->reopen_fd_count = count;
+ }
+ UNLOCK (&conf->rec_lock);
+}
+
+int
+client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner,
+ int32_t cmd)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ xlator_t *this = NULL;
+ client_posix_lock_t *lock = NULL;
+ clnt_conf_t *conf = NULL;
+
+ int ret = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (!fdctx) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get fd context. Marking as bad fd.");
+ ret = -EBADFD;
+ goto out;
+ }
+
+ lock = new_client_lock (flock, owner, cmd, fd);
+ if (!lock) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ client_setlk (fdctx, lock);
+
+out:
+ return ret;
+
+}
+
+static int
+construct_reserve_unlock (struct flock *lock, call_frame_t *frame,
+ client_posix_lock_t *client_lock)
+{
+ GF_ASSERT (lock);
+ GF_ASSERT (frame);
+ GF_ASSERT (frame->root->lk_owner);
+
+ lock->l_type = F_UNLCK;
+ lock->l_start = 0;
+ lock->l_whence = SEEK_SET;
+ lock->l_len = 0; /* Whole file */
+ lock->l_pid = (uint64_t)frame->root;
+
+ frame->root->lk_owner = client_lock->owner;
+
+ return 0;
+}
+
+static int
+construct_reserve_lock (client_posix_lock_t *client_lock, call_frame_t *frame,
+ struct flock *lock)
+{
+ GF_ASSERT (client_lock);
+
+ memcpy (lock, &(client_lock->user_flock), sizeof (struct flock));
+
+ frame->root->lk_owner = client_lock->owner;
+
+ return 0;
+}
+
+uint64_t
+decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf)
+{
+ uint64_t fd_count = 0;
+
+ LOCK (&conf->rec_lock);
+ {
+ fd_count = --(conf->reopen_fd_count);
+ }
+ UNLOCK (&conf->rec_lock);
+
+ if (fd_count == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "All locks healed on the last fd - notifying CHILDUP");
+ client_notify_parents_child_up (this);
+ }
+
+ return fd_count;
+}
+
+int32_t
+client_remove_reserve_lock_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+
+ uint64_t fd_count = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ /* TODO: critical error describing recovery command
+ and blanket on ops on fd */
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Lock recovery failed with error msg=%s",
+ strerror(op_errno));
+ goto cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Removing reserve lock was successful.");
+
+cleanup:
+ frame->local = NULL;
+ client_mark_bad_fd (local->client_lock->fd,
+ local->fdctx);
+ destroy_client_lock (local->client_lock);
+ client_local_wipe (local);
+ STACK_DESTROY (frame->root);
+ fd_count = decrement_reopen_fd_count (this, conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Need to attempt lock recovery on %lld open fds",
+ (unsigned long long) fd_count);
+ return 0;
+}
+
+static void
+client_remove_reserve_lock (xlator_t *this, call_frame_t *frame,
+ client_posix_lock_t *lock)
+{
+ struct flock unlock;
+ clnt_local_t *local = NULL;
+
+ local = frame->local;
+ construct_reserve_unlock (&unlock, frame, lock);
+
+ STACK_WIND (frame, client_remove_reserve_lock_cbk,
+ this, this->fops->lk,
+ lock->fd, F_RESLK_UNLCK, &unlock);
+}
+
+static client_posix_lock_t *
+get_next_recovery_lock (xlator_t *this, clnt_local_t *local)
+{
+ client_posix_lock_t *lock = NULL;
+
+ pthread_mutex_lock (&local->mutex);
+ {
+ if (list_empty (&local->lock_list)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lock-list empty");
+ goto unlock;
+ }
+
+ lock = list_entry ((local->lock_list).next, typeof (*lock), list);
+ list_del_init (&lock->list);
+ }
+unlock:
+ pthread_mutex_unlock (&local->mutex);
+
+ return lock;
+
+}
+
+int32_t
+client_reserve_lock_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+
+ uint64_t fd_count = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ /* Got the reserve lock. Check if lock is grantable and proceed
+ with the real lk call */
+
+ if (op_ret >= 0) {
+ /* Lock is grantable if flock reflects a successful getlk() call*/
+ if (lock->l_type == F_UNLCK && lock->l_pid) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Got the reservelk, but the lock is not grantable. ");
+ client_remove_reserve_lock (this, frame, local->client_lock);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reserve Lock succeeded");
+ client_send_recovery_lock (frame, this, local->client_lock);
+ goto out;
+ }
+
+ /* Somebody else has a reserve lk. Lock conflict detected.
+ Mark fd as bad */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reservelk OP failed. Aborting lock recovery and marking bad fd");
+
+ client_mark_bad_fd (local->client_lock->fd,
+ local->fdctx);
+ destroy_client_lock (local->client_lock);
+ frame->local = NULL;
+ client_local_wipe (local);
+ STACK_DESTROY (frame->root);
+ fd_count = decrement_reopen_fd_count (this, conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Need to attempt lock recovery on %lld open fds",
+ (unsigned long long) fd_count);
+
+out:
+ return 0;
+}
+
+int32_t
+client_recovery_lock_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ clnt_local_t *local = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_conf_t *conf = NULL;
+ client_posix_lock_t *next_lock = NULL;
+
+ struct flock reserve_flock;
+ uint64_t fd_count = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ /* TODO: critical error describing recovery command
+ and blanket on ops on fd */
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Lock recovery failed with error msg=%s",
+ strerror(op_errno));
+
+ client_mark_bad_fd (local->client_lock->fd,
+ local->fdctx);
+ goto cleanup;
+
+ /* Lock recovered. Continue with reserve lock for next lock */
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lock recovered successfully - Continuing with next lock.");
+
+ next_lock = get_next_recovery_lock (this, local);
+ if (!next_lock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "All locks recovered on fd");
+ goto cleanup;
+ }
+
+ construct_reserve_lock (next_lock, frame, &reserve_flock);
+ local->fdctx = fdctx;
+ local->client_lock = next_lock;
+
+ STACK_WIND (frame, client_reserve_lock_cbk,
+ this, this->fops->lk,
+ next_lock->fd, F_RESLK_LCK, &reserve_flock);
+ goto out;
+
+ }
+
+cleanup:
+ frame->local = NULL;
+ client_local_wipe (local);
+ if (local->client_lock)
+ destroy_client_lock (local->client_lock);
+ STACK_DESTROY (frame->root);
+ fd_count = decrement_reopen_fd_count (this, conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Need to attempt lock recovery on %lld open fds",
+ (unsigned long long) fd_count);
+
+out:
+ return 0;
+}
+
+static int
+client_send_recovery_lock (call_frame_t *frame, xlator_t *this,
+ client_posix_lock_t *lock)
+{
+
+ frame->root->lk_owner = lock->owner;
+
+ /* Send all locks as F_SETLK to prevent the frame
+ from blocking if there is a conflict */
+
+ STACK_WIND (frame, client_recovery_lock_cbk,
+ this, this->fops->lk,
+ lock->fd, F_SETLK,
+ &(lock->user_flock));
+
+ return 0;
+}
+
+static int
+client_lockrec_init (clnt_fd_ctx_t *fdctx, clnt_local_t *local)
+{
+
+ INIT_LIST_HEAD (&local->lock_list);
+ pthread_mutex_init (&local->mutex, NULL);
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_splice_init (&fdctx->lock_list, &local->lock_list);
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ return 0;
+}
+
+int
+client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ call_frame_t *frame = NULL;
+ clnt_local_t *local = NULL;
+ client_posix_lock_t *lock = NULL;
+
+ struct flock reserve_flock;
+ int ret = 0;
+
+ local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ client_lockrec_init (fdctx, local);
+
+ lock = get_next_recovery_lock (this, local);
+ if (!lock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No locks on fd");
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ construct_reserve_lock (lock, frame, &reserve_flock);
+
+ local->fdctx = fdctx;
+ local->client_lock = lock;
+
+ STACK_WIND (frame, client_reserve_lock_cbk,
+ this, this->fops->lk,
+ lock->fd, F_RESLK_LCK, &reserve_flock);
+
+out:
+ return ret;
+
+
+}
+
+int32_t
+client_dump_locks (char *name, inode_t *inode,
+ dict_t *dict)
+{
+ int ret = 0;
+ char dict_string[256];
+
+ ret = dump_client_locks (inode);
+ snprintf (dict_string, 256, "%d locks dumped in log file", ret);
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "trusted.glusterfs.clientlk-dump", dict_string);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Could not set dict with %s", CLIENT_DUMP_LOCKS);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+is_client_dump_locks_cmd (char *name)
+{
+ int ret = 0;
+
+ if (strcmp (name, CLIENT_DUMP_LOCKS) == 0)
+ ret = 1;
+
+ return ret;
+}
diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h
index 638e537d116..50015b18fd1 100644
--- a/xlators/protocol/client/src/client-mem-types.h
+++ b/xlators/protocol/client/src/client-mem-types.h
@@ -28,6 +28,7 @@ enum gf_client_mem_types_ {
gf_client_mt_clnt_local_t,
gf_client_mt_clnt_req_buf_t,
gf_client_mt_clnt_fdctx_t,
+ gf_client_mt_clnt_lock_t,
gf_client_mt_end,
};
#endif /* __CLIENT_MEM_TYPES_H__ */
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 726a9dad792..b703b88f4b7 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -1865,6 +1865,8 @@ init (xlator_t *this)
pthread_mutex_init (&conf->lock, NULL);
INIT_LIST_HEAD (&conf->saved_fds);
+ LOCK_INIT (&conf->rec_lock);
+
this->private = conf;
/* If it returns -1, then its a failure, if it returns +1 we need
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index 84940de5f84..a31873a027b 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -33,7 +33,7 @@
/* FIXME: Needs to be defined in a common file */
#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-
+#define CLIENT_DUMP_LOCKS "trusted.glusterfs.clientlk-dump"
struct clnt_options {
char *remote_subvolume;
int ping_timeout;
@@ -54,6 +54,10 @@ typedef struct clnt_conf {
rpc_clnt_prog_t *mgmt;
rpc_clnt_prog_t *handshake;
rpc_clnt_prog_t *dump;
+
+ uint64_t reopen_fd_count; /* Count of fds reopened after a
+ connection is established */
+ gf_lock_t rec_lock;
} clnt_conf_t;
typedef struct _client_fd_ctx {
@@ -68,8 +72,24 @@ typedef struct _client_fd_ctx {
char released;
int32_t flags;
int32_t wbflags;
+
+ pthread_mutex_t mutex;
+ struct list_head lock_list; /* List of all granted locks on this fd */
} clnt_fd_ctx_t;
+typedef struct _client_posix_lock {
+ fd_t *fd; /* The fd on which the lk operation was made */
+
+ struct flock user_flock; /* the flock supplied by the user */
+ off_t fl_start;
+ off_t fl_end;
+ short fl_type;
+ int32_t cmd; /* the cmd for the lock call */
+ uint64_t owner; /* lock owner from fuse */
+
+ struct list_head list; /* reference used to add to the fdctx list of locks */
+} client_posix_lock_t;
+
typedef struct client_local {
loc_t loc;
loc_t loc2;
@@ -79,6 +99,12 @@ typedef struct client_local {
uint32_t wbflags;
struct iobref *iobref;
fop_cbk_fn_t op;
+
+ client_posix_lock_t *client_lock;
+ uint64_t owner;
+ int32_t cmd;
+ struct list_head lock_list;
+ pthread_mutex_t mutex;
} clnt_local_t;
typedef struct client_args {
@@ -138,6 +164,17 @@ int unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries
int clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp);
int clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp);
-
-
+int client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx);
+int32_t delete_granted_locks_owner (fd_t *fd, uint64_t owner);
+int client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner,
+ int32_t cmd);
+uint64_t decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf);
+int32_t delete_granted_locks_fd (clnt_fd_ctx_t *fdctx);
+int32_t client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd);
+void client_save_number_fds (clnt_conf_t *conf, int count);
+int dump_client_locks (inode_t *inode);
+int client_notify_parents_child_up (xlator_t *this);
+int32_t is_client_dump_locks_cmd (char *name);
+int32_t client_dump_locks (char *name, inode_t *inode,
+ dict_t *dict);
#endif /* !_CLIENT_H */
diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c
index 69f2646978d..c3add8fd3ba 100644
--- a/xlators/protocol/client/src/client3_1-fops.c
+++ b/xlators/protocol/client/src/client3_1-fops.c
@@ -313,6 +313,7 @@ client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count,
fdctx->wbflags = local->wbflags;
INIT_LIST_HEAD (&fdctx->sfd_pos);
+ INIT_LIST_HEAD (&fdctx->lock_list);
this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
@@ -605,10 +606,14 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
call_frame_t *frame = NULL;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
gf_common_rsp rsp = {0,};
int ret = 0;
frame = myframe;
+ this = THIS;
+ local = frame->local;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -623,6 +628,18 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
+ if (rsp.op_ret >= 0) {
+ /* Delete all saved locks of the owner issuing flush */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Attempting to delete locks of owner=%llu",
+ (long long unsigned) local->owner);
+ delete_granted_locks_owner (local->fd, local->owner);
+ }
+
+ frame->local = NULL;
+ if (local)
+ client_local_wipe (local);
+
out:
STACK_UNWIND_STRICT (flush, frame, rsp.op_ret,
gf_error_to_errno (rsp.op_errno));
@@ -1442,6 +1459,7 @@ client3_1_create_cbk (struct rpc_req *req, struct iovec *iov, int count,
fdctx->flags = local->flags;
INIT_LIST_HEAD (&fdctx->sfd_pos);
+ INIT_LIST_HEAD (&fdctx->lock_list);
this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
@@ -1506,12 +1524,14 @@ int
client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
- call_frame_t *frame = NULL;
- struct flock lock = {0,};
- gfs3_lk_rsp rsp = {0,};
- int ret = 0;
+ call_frame_t *frame = NULL;
+ clnt_local_t *local = NULL;
+ struct flock lock = {0,};
+ gfs3_lk_rsp rsp = {0,};
+ int ret = 0;
frame = myframe;
+ local = frame->local;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -1531,6 +1551,20 @@ client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count,
gf_flock_to_flock (&rsp.flock, &lock);
}
+ /* Save the lock to the client lock cache to be able
+ to recover in the case of server reboot.*/
+ if (local->cmd == F_SETLK || local->cmd == F_SETLKW) {
+ ret = client_add_lock_for_recovery (local->fd, &lock,
+ local->owner, local->cmd);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ }
+ }
+
+ frame->local = NULL;
+ client_local_wipe (local);
+
out:
STACK_UNWIND_STRICT (lk, frame, rsp.op_ret,
gf_error_to_errno (rsp.op_errno), &lock);
@@ -1777,6 +1811,7 @@ client3_1_opendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
fdctx->is_dir = 1;
INIT_LIST_HEAD (&fdctx->sfd_pos);
+ INIT_LIST_HEAD (&fdctx->lock_list);
this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
@@ -2014,12 +2049,14 @@ int
client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
- int32_t ret = -1;
- gfs3_open_rsp rsp = {0,};
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- call_frame_t *frame = NULL;
+ int32_t ret = -1;
+ gfs3_open_rsp rsp = {0,};
+ int attempt_lock_recovery = _gf_false;
+ uint64_t fd_count = 0;
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ call_frame_t *frame = NULL;
frame = myframe;
local = frame->local;
@@ -2052,6 +2089,7 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
if (!fdctx->released) {
list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
+ attempt_lock_recovery = _gf_true;
fdctx = NULL;
}
}
@@ -2060,6 +2098,20 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
}
}
+ if (attempt_lock_recovery) {
+ ret = client_attempt_lock_recovery (frame->this, local->fdctx);
+ if (ret < 0)
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "No locks on fd to recover");
+ else {
+ fd_count = decrement_reopen_fd_count (frame->this, conf);
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "Need to attempt lock recovery on %lld open fds",
+ (unsigned long long) fd_count);
+
+ }
+ }
+
out:
if (fdctx)
client_fdctx_destroy (frame->this, fdctx);
@@ -2380,6 +2432,9 @@ client3_1_release (call_frame_t *frame, xlator_t *this,
if (remote_fd != -1) {
req.fd = remote_fd;
req.gfs_id = GFS3_OP_RELEASE;
+
+ delete_granted_locks_fd (fdctx);
+
ret = client_submit_request (this, &req, frame, conf->fops,
GFS3_OP_RELEASE,
client3_1_release_cbk, NULL,
@@ -3468,7 +3523,6 @@ unwind:
}
-
int32_t
client3_1_flush (call_frame_t *frame, xlator_t *this,
void *data)
@@ -3477,8 +3531,9 @@ client3_1_flush (call_frame_t *frame, xlator_t *this,
gfs3_flush_req req = {0,};
clnt_fd_ctx_t *fdctx = NULL;
clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
int op_errno = ESTALE;
- int ret = 0;
+ int ret = 0;
if (!frame || !this || !data)
goto unwind;
@@ -3507,6 +3562,21 @@ client3_1_flush (call_frame_t *frame, xlator_t *this,
goto unwind;
}
+ conf = this->private;
+
+ local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of Memory");
+ STACK_UNWIND (frame, -1, ENOMEM);
+ return 0;
+
+ }
+
+ local->fd = fd_ref (args->fd);
+ local->owner = frame->root->lk_owner;
+ frame->local = local;
+
req.fd = fdctx->remote_fd;
req.gfs_id = GFS3_OP_FLUSH;
@@ -4012,16 +4082,23 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this,
clnt_conf_t *conf = NULL;
clnt_args_t *args = NULL;
gfs3_getxattr_req req = {0,};
+ dict_t *dict = NULL;
int ret = 0;
+ int32_t op_ret = 0;
int op_errno = ESTALE;
- if (!frame || !this || !data)
+ if (!frame || !this || !data) {
+ op_ret = -1;
+ op_errno = 0;
goto unwind;
-
+ }
args = data;
- if (!(args->loc && args->loc->inode))
+ if (!(args->loc && args->loc->inode)) {
+ op_ret = -1;
+ op_errno = EINVAL;
goto unwind;
+ }
memcpy (req.gfid, args->loc->inode->gfid, 16);
req.namelen = 1; /* Use it as a flag */
@@ -4035,19 +4112,42 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this,
conf = this->private;
+ if (args && args->name) {
+ if (is_client_dump_locks_cmd ((char *)args->name)) {
+ ret = client_dump_locks ((char *)args->name,
+ args->loc->inode,
+ dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Client dump locks failed");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ GF_ASSERT (dict);
+ op_ret = 0;
+ op_errno = 0;
+ goto unwind;
+ }
+ }
+
ret = client_submit_request (this, &req, frame, conf->fops,
GFS3_OP_GETXATTR,
client3_1_getxattr_cbk, NULL,
xdr_from_getxattr_req, NULL, 0,
NULL, 0, NULL);
if (ret) {
+ op_ret = -1;
op_errno = ENOTCONN;
goto unwind;
}
return 0;
unwind:
- STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, NULL);
+ if (dict)
+ dict_unref (dict);
+
return 0;
}
@@ -4242,20 +4342,28 @@ int32_t
client3_1_lk (call_frame_t *frame, xlator_t *this,
void *data)
{
- clnt_args_t *args = NULL;
- gfs3_lk_req req = {0,};
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
- int ret = 0;
+ clnt_args_t *args = NULL;
+ gfs3_lk_req req = {0,};
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
if (!frame || !this || !data)
goto unwind;
args = data;
conf = this->private;
+ local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
pthread_mutex_lock (&conf->lock);
{
@@ -4278,16 +4386,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
- if (args->cmd == F_GETLK || args->cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (args->cmd == F_SETLK || args->cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
+ ret = client_cmd_to_gf_cmd (args->cmd, &gf_cmd);
+ if (ret) {
+ op_errno = EINVAL;
gf_log (this->name, GF_LOG_DEBUG,
"Unknown cmd (%d)!", gf_cmd);
- goto unwind;
}
switch (args->flock->l_type) {
@@ -4302,6 +4405,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this,
break;
}
+ local->owner = frame->root->lk_owner;
+ local->cmd = args->cmd;
+ local->fd = fd_ref (args->fd);
+ frame->local = local;
+
req.fd = fdctx->remote_fd;
req.cmd = gf_cmd;
req.type = gf_type;
diff --git a/xlators/protocol/legacy/client/src/client-protocol.c b/xlators/protocol/legacy/client/src/client-protocol.c
index 57cfbc73e33..ebb4e6fcf96 100644
--- a/xlators/protocol/legacy/client/src/client-protocol.c
+++ b/xlators/protocol/legacy/client/src/client-protocol.c
@@ -145,16 +145,26 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, client_fd_ctx_t *ctx)
ret = fd_ctx_get (file, this, &oldaddr);
if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): trying duplicate remote fd set. ",
- loc->path, loc->inode->ino);
+ if (loc)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (%"PRId64"): trying duplicate remote fd set. ",
+ loc->path, loc->inode->ino);
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%p: trying duplicate remote fd set. ",
+ file);
}
ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to set remote fd",
- loc->path, loc->inode->ino);
+ if (loc)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (%"PRId64"): failed to set remote fd",
+ loc->path, loc->inode->ino);
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%p: failed to set remote fd",
+ file);
}
out:
return;
diff --git a/xlators/protocol/server/src/server3_1-fops.c b/xlators/protocol/server/src/server3_1-fops.c
index a0276ad6bb4..9c0ac90cdf9 100644
--- a/xlators/protocol/server/src/server3_1-fops.c
+++ b/xlators/protocol/server/src/server3_1-fops.c
@@ -4851,6 +4851,15 @@ server_lk (rpcsvc_request_t *req)
case GF_LK_SETLKW:
state->cmd = F_SETLKW;
break;
+ case GF_LK_RESLK_LCK:
+ state->cmd = F_RESLK_LCK;
+ break;
+ case GF_LK_RESLK_LCKW:
+ state->cmd = F_RESLK_LCKW;
+ break;
+ case GF_LK_RESLK_UNLCK:
+ state->cmd = F_RESLK_UNLCK;
+ break;
}
gf_flock_to_flock (&args.flock, &state->flock);