summaryrefslogtreecommitdiffstats
path: root/xlators/features/locks/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/features/locks/src')
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c1414
-rw-r--r--xlators/features/locks/src/common.h179
-rw-r--r--xlators/features/locks/src/entrylk.c848
-rw-r--r--xlators/features/locks/src/inodelk.c825
-rw-r--r--xlators/features/locks/src/internal.c896
-rw-r--r--xlators/features/locks/src/locks-mem-types.h29
-rw-r--r--xlators/features/locks/src/locks.h195
-rw-r--r--xlators/features/locks/src/posix.c3051
-rw-r--r--xlators/features/locks/src/reservelk.c443
12 files changed, 6432 insertions, 1965 deletions
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
index ec4a953eb..0f79731b4 100644
--- a/xlators/features/locks/src/Makefile.am
+++ b/xlators/features/locks/src/Makefile.am
@@ -1,15 +1,18 @@
xlator_LTLIBRARIES = locks.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-locks_la_LDFLAGS = -module -avoidversion
+locks_la_LDFLAGS = -module -avoid-version
-locks_la_SOURCES = common.c posix.c internal.c
-locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \
+ clear.c
+locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = locks.h common.h
+noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
@@ -17,4 +20,4 @@ uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
install-data-hook:
- ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so \ No newline at end of file
+ ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
new file mode 100644
index 000000000..124b9ad0f
--- /dev/null
+++ b/xlators/features/locks/src/clear.c
@@ -0,0 +1,424 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+#include "common.h"
+#include "statedump.h"
+#include "clear.h"
+
+int
+clrlk_get_kind (char *kind)
+{
+ char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted",
+ "all"};
+ int ret_kind = CLRLK_KIND_MAX;
+ int i = 0;
+
+ for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) {
+ if (!strcmp (clrlk_kinds[i], kind)) {
+ ret_kind = i;
+ break;
+ }
+ }
+
+ return ret_kind;
+}
+
+int
+clrlk_get_type (char *type)
+{
+ char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"};
+ int ret_type = CLRLK_TYPE_MAX;
+ int i = 0;
+
+ for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) {
+ if (!strcmp (clrlk_types[i], type)) {
+ ret_type = i;
+ break;
+ }
+ }
+
+ return ret_type;
+}
+
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range)
+{
+ int ret = -1;
+
+ if (!chk_range)
+ goto out;
+
+ if (!range_str) {
+ ret = 0;
+ *chk_range = _gf_false;
+ goto out;
+ }
+
+ if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence,
+ &ulock->l_start, &ulock->l_len) != 3) {
+ goto out;
+ }
+
+ ret = 0;
+ *chk_range = _gf_true;
+out:
+ return ret;
+}
+
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args)
+{
+ char *opts = NULL;
+ char *cur = NULL;
+ char *tok = NULL;
+ char *sptr = NULL;
+ char *free_ptr = NULL;
+ char kw[KW_MAX] = {[KW_TYPE] = 't',
+ [KW_KIND] = 'k',
+ };
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (cmd);
+ free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char);
+ if (!opts)
+ goto out;
+
+ if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /*clr_lk_prefix.ttype.kkind.args, args - type specific*/
+ cur = opts;
+ for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr));
+ cur = NULL, i++) {
+ if (tok[0] != kw[i]) {
+ ret = -1;
+ goto out;
+ }
+ if (i == KW_TYPE)
+ args->type = clrlk_get_type (tok+1);
+ if (i == KW_KIND)
+ args->kind = clrlk_get_kind (tok+1);
+ }
+
+ if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX))
+ goto out;
+
+ /*optional args, neither range nor basename can 'legally' contain
+ * "/" in them*/
+ tok = strtok_r (NULL, "/", &sptr);
+ if (tok)
+ args->opts = gf_strdup (tok);
+
+ ret = 0;
+out:
+ GF_FREE (free_ptr);
+ return ret;
+}
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno)
+{
+ posix_lock_t *plock = NULL;
+ posix_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list,
+ list) {
+ if ((plock->blocked &&
+ !(args->kind & CLRLK_BLOCKED)) ||
+ (!plock->blocked &&
+ !(args->kind & CLRLK_GRANTED)))
+ continue;
+
+ if (chk_range &&
+ (plock->user_flock.l_whence != ulock.l_whence
+ || plock->user_flock.l_start != ulock.l_start
+ || plock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ list_del_init (&plock->list);
+ if (plock->blocked) {
+ bcount++;
+ pl_trace_out (this, plock->frame, NULL, NULL,
+ F_SETLKW, &plock->user_flock,
+ -1, EAGAIN, NULL);
+
+ STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN,
+ &plock->user_flock, NULL);
+
+ } else {
+ gcount++;
+ }
+ GF_FREE (plock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ grant_blocked_locks (this, pl_inode);
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_inode_lock_t *ilock = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ bcount++;
+ list_del_init (&ilock->blocked_locks);
+ list_add (&ilock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) {
+ list_del_init (&ilock->blocked_locks);
+ pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN,
+ ilock->volume);
+ STACK_UNWIND_STRICT (inodelk, ilock->frame, -1,
+ EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list,
+ list) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ gcount++;
+ list_del_init (&ilock->list);
+ list_add (&ilock->list, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, list) {
+ list_del_init (&ilock->list);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ ret = 0;
+out:
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_entry_lock_t *elock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ int bcount = 0;
+ int gcount = 0;
+ int ret = -1;
+ struct list_head removed;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ bcount++;
+
+ list_del_init (&elock->blocked_locks);
+ list_add_tail (&elock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &released, blocked_locks) {
+ list_del_init (&elock->blocked_locks);
+ entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type,
+ -1, EAGAIN);
+ STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL);
+ GF_FREE ((char *) elock->basename);
+ GF_FREE (elock->connection_id);
+ GF_FREE (elock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ INIT_LIST_HEAD (&removed);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->entrylk_list,
+ domain_list) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ gcount++;
+ list_del_init (&elock->domain_list);
+ list_add_tail (&elock->domain_list, &removed);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &removed, domain_list) {
+ grant_blocked_entry_locks (this, pl_inode, elock, dom);
+ }
+
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno)
+{
+ pl_dom_list_t *dom = NULL;
+ int ret = -1;
+ int tmp_bcount = 0;
+ int tmp_gcount = 0;
+
+ if (list_empty (&pl_inode->dom_list)) {
+ ret = 0;
+ goto out;
+ }
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ tmp_bcount = tmp_gcount = 0;
+
+ switch (args->type)
+ {
+ case CLRLK_INODE:
+ ret = clrlk_clear_inodelk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ case CLRLK_ENTRY:
+ ret = clrlk_clear_entrylk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ }
+
+ *blkd += tmp_bcount;
+ *granted += tmp_gcount;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
new file mode 100644
index 000000000..511f3f74a
--- /dev/null
+++ b/xlators/features/locks/src/clear.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLEAR_H__
+#define __CLEAR_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "compat-errno.h"
+#include "stack.h"
+#include "call-stub.h"
+#include "locks.h"
+
+typedef enum {
+ CLRLK_INODE,
+ CLRLK_ENTRY,
+ CLRLK_POSIX,
+ CLRLK_TYPE_MAX
+} clrlk_type;
+
+typedef enum {
+ CLRLK_BLOCKED = 1,
+ CLRLK_GRANTED,
+ CLRLK_ALL,
+ CLRLK_KIND_MAX
+} clrlk_kind;
+
+typedef enum {
+ KW_TYPE,
+ KW_KIND,
+ /*add new keywords here*/
+ KW_MAX
+} clrlk_opts;
+
+struct _clrlk_args;
+typedef struct _clrlk_args clrlk_args;
+
+struct _clrlk_args {
+ int type;
+ int kind;
+ char *opts;
+};
+
+int
+clrlk_get__kind (char *kind);
+int
+clrlk_get_type (char *type);
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range);
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args);
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno);
+#endif /* __CLEAR_H__ */
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 1f10aa20c..b3309580d 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -35,83 +25,477 @@
#include "common-utils.h"
#include "locks.h"
+#include "common.h"
static int
-__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom);
+__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock);
static void
-__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom);
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock);
+static int
+pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock);
+
+static pl_dom_list_t *
+__allocate_domain (const char *volume)
+{
+ pl_dom_list_t *dom = NULL;
-#define DOMAIN_HEAD(pl_inode, dom) (dom == GF_LOCK_POSIX \
- ? &pl_inode->ext_list \
- : &pl_inode->int_list)
+ dom = GF_CALLOC (1, sizeof (*dom),
+ gf_locks_mt_pl_dom_list_t);
+ if (!dom)
+ goto out;
-pl_inode_t *
-pl_inode_get (xlator_t *this, inode_t *inode)
+ dom->domain = gf_strdup(volume);
+ if (!dom->domain)
+ goto out;
+
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "New domain allocated: %s", dom->domain);
+
+ INIT_LIST_HEAD (&dom->inode_list);
+ INIT_LIST_HEAD (&dom->entrylk_list);
+ INIT_LIST_HEAD (&dom->blocked_entrylks);
+ INIT_LIST_HEAD (&dom->inodelk_list);
+ INIT_LIST_HEAD (&dom->blocked_inodelks);
+
+out:
+ if (dom && (NULL == dom->domain)) {
+ GF_FREE (dom);
+ dom = NULL;
+ }
+
+ return dom;
+}
+
+/* Returns domain for the lock. If domain is not present,
+ * allocates a domain and returns it
+ */
+pl_dom_list_t *
+get_domain (pl_inode_t *pl_inode, const char *volume)
{
- uint64_t tmp_pl_inode = 0;
- pl_inode_t *pl_inode = NULL;
- mode_t st_mode = 0;
- int ret = 0;
+ pl_dom_list_t *dom = NULL;
+
+ GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", volume, out);
- ret = inode_ctx_get (inode, this,&tmp_pl_inode);
- if (ret == 0) {
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- goto out;
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (strcmp (dom->domain, volume) == 0)
+ goto unlock;
+ }
+
+ dom = __allocate_domain (volume);
+ if (dom)
+ list_add (&dom->inode_list, &pl_inode->dom_list);
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ if (dom) {
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume);
+ } else {
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume);
}
- pl_inode = CALLOC (1, sizeof (*pl_inode));
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+out:
+ return dom;
+}
- st_mode = inode->st_mode;
- if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP))
- pl_inode->mandatory = 1;
+unsigned long
+fd_to_fdnum (fd_t *fd)
+{
+ return ((unsigned long) fd);
+}
+fd_t *
+fd_from_fdnum (posix_lock_t *lock)
+{
+ return ((fd_t *) lock->fd_num);
+}
- pthread_mutex_init (&pl_inode->mutex, NULL);
+int
+__pl_inode_is_empty (pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ int is_empty = 1;
- INIT_LIST_HEAD (&pl_inode->dir_list);
- INIT_LIST_HEAD (&pl_inode->ext_list);
- INIT_LIST_HEAD (&pl_inode->int_list);
- INIT_LIST_HEAD (&pl_inode->rw_list);
+ if (!list_empty (&pl_inode->ext_list))
+ is_empty = 0;
- ret = inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (!list_empty (&dom->entrylk_list))
+ is_empty = 0;
-out:
- return pl_inode;
+ if (!list_empty (&dom->inodelk_list))
+ is_empty = 0;
+ }
+
+ return is_empty;
+}
+
+void
+pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame)
+{
+ snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu",
+ (unsigned long long) frame->root->pid,
+ lkowner_utoa (&frame->root->lk_owner),
+ frame->root->client,
+ (unsigned long long) frame->root->unique);
+}
+
+
+void
+pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc)
+{
+ inode_t *inode = NULL;
+ char *ipath = NULL;
+ int ret = 0;
+
+ if (fd)
+ inode = fd->inode;
+ if (loc)
+ inode = loc->inode;
+
+ if (!inode) {
+ snprintf (str, size, "<nul>");
+ return;
+ }
+
+ if (loc && loc->path) {
+ ipath = gf_strdup (loc->path);
+ } else {
+ ret = inode_path (inode, NULL, &ipath);
+ if (ret <= 0)
+ ipath = NULL;
+ }
+
+ snprintf (str, size, "gfid=%s, fd=%p, path=%s",
+ uuid_utoa (inode->gfid), fd,
+ ipath ? ipath : "<nul>");
+
+ GF_FREE (ipath);
+}
+
+
+void
+pl_print_lock (char *str, int size, int cmd,
+ struct gf_flock *flock, gf_lkowner_t *owner)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, "
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ cmd_str, type_str, (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner));
+}
+
+
+void
+pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_lock);
+}
+
+
+void
+pl_print_verdict (char *str, int size, int op_ret, int op_errno)
+{
+ char *verdict = NULL;
+
+ if (op_ret == 0) {
+ verdict = "GRANTED";
+ } else {
+ switch (op_errno) {
+ case EAGAIN:
+ verdict = "TRYAGAIN";
+ break;
+ default:
+ verdict = strerror (op_errno);
+ }
+ }
+
+ snprintf (str, size, "%s", verdict);
+}
+
+
+void
+pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain)
+
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
+ char verdict[32];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+
+ pl_print_verdict (verdict, 32, op_ret, op_errno);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ verdict, pl_locker, pl_lockee, pl_lock);
+}
+
+
+void
+pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain)
+
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_lock);
+}
+
+
+void
+pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ pl_inode_t *pl_inode = NULL;
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (pl_inode && __pl_inode_is_empty (pl_inode))
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, NULL);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[FLUSH] Locker = {%s} Lockee = {%s}",
+ pl_locker, pl_lockee);
+}
+
+void
+pl_trace_release (xlator_t *this, fd_t *fd)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_lockee[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_lockee (pl_lockee, 256, fd, NULL);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[RELEASE] Lockee = {%s}", pl_lockee);
+}
+
+
+void
+pl_update_refkeeper (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ int is_empty = 0;
+ int need_unref = 0;
+ int need_ref = 0;
+
+ pl_inode = pl_inode_get (this, inode);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ is_empty = __pl_inode_is_empty (pl_inode);
+
+ if (is_empty && pl_inode->refkeeper) {
+ need_unref = 1;
+ pl_inode->refkeeper = NULL;
+ }
+
+ if (!is_empty && !pl_inode->refkeeper) {
+ need_ref = 1;
+ pl_inode->refkeeper = inode;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ if (need_unref)
+ inode_unref (inode);
+
+ if (need_ref)
+ inode_ref (inode);
+}
+
+
+pl_inode_t *
+pl_inode_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_pl_inode = 0;
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto unlock;
+ }
+ pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
+ gf_locks_mt_pl_inode_t);
+ if (!pl_inode) {
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Allocating new pl inode");
+
+ pthread_mutex_init (&pl_inode->mutex, NULL);
+
+ INIT_LIST_HEAD (&pl_inode->dom_list);
+ INIT_LIST_HEAD (&pl_inode->ext_list);
+ INIT_LIST_HEAD (&pl_inode->rw_list);
+ INIT_LIST_HEAD (&pl_inode->reservelk_list);
+ INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
+ INIT_LIST_HEAD (&pl_inode->blocked_calls);
+
+ __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
-new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid)
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd)
{
- posix_lock_t *lock = NULL;
+ posix_lock_t *lock = NULL;
- lock = CALLOC (1, sizeof (posix_lock_t));
- if (!lock) {
- return NULL;
- }
+ GF_VALIDATE_OR_GOTO ("posix-locks", flock, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", client, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fd, out);
- lock->fl_start = flock->l_start;
- lock->fl_type = flock->l_type;
+ lock = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!lock) {
+ goto out;
+ }
- if (flock->l_len == 0)
- lock->fl_end = LLONG_MAX;
- else
- lock->fl_end = flock->l_start + flock->l_len - 1;
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
- lock->transport = transport;
- lock->client_pid = client_pid;
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
- INIT_LIST_HEAD (&lock->list);
+ lock->client = client;
+ lock->fd_num = fd_to_fdnum (fd);
+ lock->fd = fd;
+ lock->client_pid = client_pid;
+ lock->owner = *owner;
- return lock;
+ INIT_LIST_HEAD (&lock->list);
+
+out:
+ return lock;
}
@@ -119,7 +503,7 @@ new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid)
void
__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- list_del_init (&lock->list);
+ list_del_init (&lock->list);
}
@@ -127,32 +511,37 @@ __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
void
__destroy_lock (posix_lock_t *lock)
{
- free (lock);
+ GF_FREE (lock);
}
-/* Convert a posix_lock to a struct flock */
+/* Convert a posix_lock to a struct gf_flock */
void
-posix_lock_to_flock (posix_lock_t *lock, struct flock *flock)
+posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock)
{
- flock->l_pid = lock->client_pid;
- flock->l_type = lock->fl_type;
- flock->l_start = lock->fl_start;
-
- if (lock->fl_end == LLONG_MAX)
- flock->l_len = 0;
- else
- flock->l_len = lock->fl_end - lock->fl_start + 1;
+ flock->l_pid = lock->client_pid;
+ flock->l_type = lock->fl_type;
+ flock->l_start = lock->fl_start;
+ flock->l_owner = lock->owner;
+
+ if (lock->fl_end == LLONG_MAX)
+ flock->l_len = 0;
+ else
+ flock->l_len = lock->fl_end - lock->fl_start + 1;
}
-
/* Insert the lock into the inode's lock list */
static void
-__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom));
+ if (lock->blocked)
+ gettimeofday (&lock->blkd_time, NULL);
+ else
+ gettimeofday (&lock->granted_time, NULL);
- return;
+ list_add_tail (&lock->list, &pl_inode->ext_list);
+
+ return;
}
@@ -160,14 +549,14 @@ __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
int
locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
{
- /*
- Note:
- FUSE always gives us absolute offsets, so no need to worry
- about SEEK_CUR or SEEK_END
- */
-
- return ((l1->fl_end >= l2->fl_start) &&
- (l2->fl_end >= l1->fl_start));
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
}
@@ -175,24 +564,26 @@ locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
int
same_owner (posix_lock_t *l1, posix_lock_t *l2)
{
- return ((l1->client_pid == l2->client_pid) &&
- (l1->transport == l2->transport));
+
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
+
}
/* Delete all F_UNLCK locks */
void
-__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom)
+__delete_unlck_locks (pl_inode_t *pl_inode)
{
- posix_lock_t *l = NULL;
- posix_lock_t *tmp = NULL;
-
- list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
- if (l->fl_type == F_UNLCK) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
}
@@ -200,130 +591,202 @@ __delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom)
static posix_lock_t *
add_locks (posix_lock_t *l1, posix_lock_t *l2)
{
- posix_lock_t *sum = NULL;
+ posix_lock_t *sum = NULL;
- sum = CALLOC (1, sizeof (posix_lock_t));
- if (!sum)
- return NULL;
+ sum = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!sum)
+ return NULL;
- sum->fl_start = min (l1->fl_start, l2->fl_start);
- sum->fl_end = max (l1->fl_end, l2->fl_end);
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
- return sum;
+ return sum;
}
/* Subtract two locks */
struct _values {
- posix_lock_t *locks[3];
+ posix_lock_t *locks[3];
};
/* {big} must always be contained inside {small} */
static struct _values
subtract_locks (posix_lock_t *big, posix_lock_t *small)
{
- struct _values v = { .locks = {0, 0, 0} };
-
- if ((big->fl_start == small->fl_start) &&
- (big->fl_end == small->fl_end)) {
- /* both edges coincide with big */
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_type = small->fl_type;
- }
- else if ((small->fl_start > big->fl_start) &&
- (small->fl_end < big->fl_end)) {
- /* both edges lie inside big */
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[1]);
- v.locks[2] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[2]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- memcpy (v.locks[2], big, sizeof (posix_lock_t));
- v.locks[2]->fl_start = small->fl_end + 1;
- }
- /* one edge coincides with big */
- else if (small->fl_start == big->fl_start) {
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[1]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_start = small->fl_end + 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- }
- else if (small->fl_end == big->fl_end) {
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[1]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- }
- else {
- gf_log ("posix-locks", GF_LOG_ERROR,
- "Unexpected case in subtract_locks. Please send "
- "a bug report to gluster-devel@nongnu.org");
- }
-
- return v;
-}
-
-/*
- Start searching from {begin}, and return the first lock that
- conflicts, NULL if no conflict
- If {begin} is NULL, then start from the beginning of the list
+
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ goto done;
+ }
+
+ if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+
+ memcpy (v.locks[2], big, sizeof (posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ goto done;
+
+ }
+
+ /* one edge coincides with big */
+ if (small->fl_start == big->fl_start) {
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ goto done;
+ }
+
+ if (small->fl_end == big->fl_end) {
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ goto done;
+ }
+
+ GF_ASSERT (0);
+ gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks");
+
+out:
+ if (v.locks[0]) {
+ GF_FREE (v.locks[0]);
+ v.locks[0] = NULL;
+ }
+ if (v.locks[1]) {
+ GF_FREE (v.locks[1]);
+ v.locks[1] = NULL;
+ }
+ if (v.locks[2]) {
+ GF_FREE (v.locks[2]);
+ v.locks[2] = NULL;
+ }
+
+done:
+ return v;
+}
+
+static posix_lock_t *
+first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *conf = NULL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
+
+ if (locks_overlap (l, lock)) {
+ if (same_owner (l, lock))
+ continue;
+
+ if ((l->fl_type == F_WRLCK) ||
+ (lock->fl_type == F_WRLCK)) {
+ conf = l;
+ goto unlock;
+ }
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return conf;
+}
+
+/*
+ Start searching from {begin}, and return the first lock that
+ conflicts, NULL if no conflict
+ If {begin} is NULL, then start from the beginning of the list
*/
static posix_lock_t *
-first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom)
+first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *l = NULL;
+ posix_lock_t *l = NULL;
- list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
- if (l->blocked)
- continue;
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
- if (locks_overlap (l, lock))
- return l;
- }
+ if (locks_overlap (l, lock))
+ return l;
+ }
- return NULL;
+ return NULL;
}
/* Return true if lock is grantable */
static int
-__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom)
+__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *l = NULL;
- int ret = 1;
-
- list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
- if (!l->blocked && locks_overlap (lock, l)) {
- if (((l->fl_type == F_WRLCK)
- || (lock->fl_type == F_WRLCK))
- && (lock->fl_type != F_UNLCK)
- && !same_owner (l, lock)) {
- ret = 0;
- break;
- }
- }
- }
- return ret;
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (!l->blocked && locks_overlap (lock, l)) {
+ if (((l->fl_type == F_WRLCK)
+ || (lock->fl_type == F_WRLCK))
+ && (lock->fl_type != F_UNLCK)
+ && !same_owner (l, lock)) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ return ret;
}
@@ -331,231 +794,428 @@ extern void do_blocked_rw (pl_inode_t *);
static void
-__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom)
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
- posix_lock_t *t = NULL;
- posix_lock_t *sum = NULL;
- int i = 0;
- struct _values v = { .locks = {0, 0, 0} };
+ posix_lock_t *conf = NULL;
+ posix_lock_t *t = NULL;
+ posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = { .locks = {0, 0, 0} };
+
+ list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) {
+ if (conf->blocked)
+ continue;
+ if (!locks_overlap (conf, lock))
+ continue;
+
+ if (same_owner (conf, lock)) {
+ if (conf->fl_type == lock->fl_type) {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = lock->fl_type;
+ sum->client = lock->client;
+ sum->fd_num = lock->fd_num;
+ sum->client_pid = lock->client_pid;
+ sum->owner = lock->owner;
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __destroy_lock (lock);
+ INIT_LIST_HEAD (&sum->list);
+ posix_lock_to_flock (sum, &sum->user_flock);
+ __insert_and_merge (pl_inode, sum);
+
+ return;
+ } else {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = conf->fl_type;
+ sum->client = conf->client;
+ sum->fd_num = conf->fd_num;
+ sum->client_pid = conf->client_pid;
+ sum->owner = conf->owner;
+
+ v = subtract_locks (sum, lock);
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __delete_lock (pl_inode, lock);
+ __destroy_lock (lock);
+
+ __destroy_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD (&v.locks[i]->list);
+ posix_lock_to_flock (v.locks[i],
+ &v.locks[i]->user_flock);
+ __insert_and_merge (pl_inode,
+ v.locks[i]);
+ }
+
+ __delete_unlck_locks (pl_inode);
+ return;
+ }
+ }
+
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
+
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ __insert_lock (pl_inode, lock);
+ return;
+ }
+ }
- list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) {
- if (!locks_overlap (conf, lock))
- continue;
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ __insert_lock (pl_inode, lock);
+ } else {
+ __destroy_lock (lock);
+ }
+}
- if (same_owner (conf, lock)) {
- if (conf->fl_type == lock->fl_type) {
- sum = add_locks (lock, conf);
- sum->fl_type = lock->fl_type;
- sum->transport = lock->transport;
- sum->client_pid = lock->client_pid;
+void
+__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted)
+{
+ struct list_head tmp_list;
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *conf = NULL;
+
+ INIT_LIST_HEAD (&tmp_list);
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if (l->blocked) {
+ conf = first_overlap (pl_inode, l);
+ if (conf)
+ continue;
+
+ l->blocked = 0;
+ list_move_tail (&l->list, &tmp_list);
+ }
+ }
- __delete_lock (pl_inode, conf);
- __destroy_lock (conf);
+ list_for_each_entry_safe (l, tmp, &tmp_list, list) {
+ list_del_init (&l->list);
- __destroy_lock (lock);
- __insert_and_merge (pl_inode, sum, dom);
+ if (__is_lock_grantable (pl_inode, l)) {
+ conf = GF_CALLOC (1, sizeof (*conf),
+ gf_locks_mt_posix_lock_t);
- return;
- } else {
- sum = add_locks (lock, conf);
+ if (!conf) {
+ l->blocked = 1;
+ __insert_lock (pl_inode, l);
+ continue;
+ }
- sum->fl_type = conf->fl_type;
- sum->transport = conf->transport;
- sum->client_pid = conf->client_pid;
+ conf->frame = l->frame;
+ l->frame = NULL;
- v = subtract_locks (sum, lock);
-
- __delete_lock (pl_inode, conf);
- __destroy_lock (conf);
+ posix_lock_to_flock (l, &conf->user_flock);
- __delete_lock (pl_inode, lock);
- __destroy_lock (lock);
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ l->client_pid, lkowner_utoa (&l->owner),
+ l->user_flock.l_start,
+ l->user_flock.l_len);
- __destroy_lock (sum);
+ __insert_and_merge (pl_inode, l);
- for (i = 0; i < 3; i++) {
- if (!v.locks[i])
- continue;
+ list_add (&conf->list, granted);
+ } else {
+ l->blocked = 1;
+ __insert_lock (pl_inode, l);
+ }
+ }
+}
- if (v.locks[i]->fl_type == F_UNLCK) {
- __destroy_lock (v.locks[i]);
- continue;
- }
- __insert_and_merge (pl_inode,
- v.locks[i], dom);
- }
- __delete_unlck_locks (pl_inode, dom);
- return;
- }
- }
+void
+grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
- if (lock->fl_type == F_UNLCK) {
- continue;
- }
+ INIT_LIST_HEAD (&granted_list);
- if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
- __insert_lock (pl_inode, lock, dom);
- return;
- }
- }
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_locks (this, pl_inode, &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- /* no conflicts, so just insert */
- if (lock->fl_type != F_UNLCK) {
- __insert_lock (pl_inode, lock, dom);
- } else {
- __destroy_lock (lock);
- }
-}
+ list_for_each_entry_safe (lock, tmp, &granted_list, list) {
+ list_del_init (&lock->list);
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, NULL);
-void
-__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode,
- gf_lk_domain_t dom, struct list_head *granted)
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
+
+ GF_FREE (lock);
+ }
+
+ return;
+}
+
+static int
+pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock)
{
- struct list_head tmp_list;
- posix_lock_t *l = NULL;
- posix_lock_t *tmp = NULL;
- posix_lock_t *conf = NULL;
+ struct gf_flock flock = {0,};
+ posix_lock_t *unlock_lock = NULL;
- INIT_LIST_HEAD (&tmp_list);
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
- list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
- if (l->blocked) {
- conf = first_overlap (pl_inode, l, dom);
- if (conf)
- continue;
+ int ret = -1;
- l->blocked = 0;
- list_move_tail (&l->list, &tmp_list);
- }
- }
+ INIT_LIST_HEAD (&granted_list);
- list_for_each_entry_safe (l, tmp, &tmp_list, list) {
- list_del_init (&l->list);
+ flock.l_type = F_UNLCK;
+ flock.l_whence = old_lock->user_flock.l_whence;
+ flock.l_start = old_lock->user_flock.l_start;
+ flock.l_len = old_lock->user_flock.l_len;
- if (__is_lock_grantable (pl_inode, l, dom)) {
- conf = CALLOC (1, sizeof (*conf));
- if (!conf) {
- l->blocked = 1;
- __insert_lock (pl_inode, l, dom);
- continue;
- }
+ unlock_lock = new_posix_lock (&flock, old_lock->client,
+ old_lock->client_pid, &old_lock->owner,
+ old_lock->fd);
+ GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out);
+ ret = 0;
- conf->frame = l->frame;
- l->frame = NULL;
+ __insert_and_merge (pl_inode, unlock_lock);
- posix_lock_to_flock (l, &conf->user_flock);
+ __grant_blocked_locks (this, pl_inode, &granted_list);
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => Granted",
- l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid,
- l->user_flock.l_start,
- l->user_flock.l_len);
+ list_for_each_entry_safe (lock, tmp, &granted_list, list) {
+ list_del_init (&lock->list);
- __insert_and_merge (pl_inode, l, dom);
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, NULL);
- list_add (&conf->list, granted);
- } else {
- l->blocked = 1;
- __insert_lock (pl_inode, l, dom);
- }
- }
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
+
+ GF_FREE (lock);
+ }
+
+out:
+ return ret;
}
+int
+pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ /* Send unlock before the actual lock to
+ prevent lock upgrade / downgrade
+ problems only if:
+ - it is a blocking call
+ - it has other conflicting locks
+ */
+
+ if (can_block &&
+ !(__is_lock_grantable (pl_inode, lock))) {
+ ret = pl_send_prelock_unlock (this, pl_inode,
+ lock);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not send pre-lock "
+ "unlock");
+ }
+
+ if (__is_lock_grantable (pl_inode, lock)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ __insert_and_merge (pl_inode, lock);
+ } else if (can_block) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ lock->blocked = 1;
+ __insert_lock (pl_inode, lock);
+ ret = -1;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ errno = EAGAIN;
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_locks (this, pl_inode);
-void
-grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom)
+ do_blocked_rw (pl_inode);
+
+ return ret;
+}
+
+
+posix_lock_t *
+pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- struct list_head granted_list;
- posix_lock_t *tmp = NULL;
- posix_lock_t *lock = NULL;
+ posix_lock_t *conf = NULL;
- INIT_LIST_HEAD (&granted_list);
+ conf = first_conflicting_overlap (pl_inode, lock);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_locks (this, pl_inode, dom, &granted_list);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ if (conf == NULL) {
+ lock->fl_type = F_UNLCK;
+ return lock;
+ }
- list_for_each_entry_safe (lock, tmp, &granted_list, list) {
- list_del_init (&lock->list);
+ return conf;
+}
- STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
- FREE (lock);
- }
+struct _lock_table *
+pl_lock_table_new (void)
+{
+ struct _lock_table *new = NULL;
- return;
+ new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table);
+ if (new == NULL) {
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->entrylk_lockers);
+ INIT_LIST_HEAD (&new->inodelk_lockers);
+ LOCK_INIT (&new->lock);
+out:
+ return new;
}
int
-pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block, gf_lk_domain_t dom)
-{
- int ret = 0;
-
- errno = 0;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- if (__is_lock_grantable (pl_inode, lock, dom)) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- __insert_and_merge (pl_inode, lock, dom);
- } else if (can_block) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- lock->blocked = 1;
- __insert_lock (pl_inode, lock, dom);
- ret = -1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- errno = EAGAIN;
- ret = -1;
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- grant_blocked_locks (this, pl_inode, dom);
-
- do_blocked_rw (pl_inode);
-
- return ret;
-}
+pl_add_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner,
+ glusterfs_fop_t type)
+{
+ int32_t ret = -1;
+ struct _locker *new = NULL;
+ GF_VALIDATE_OR_GOTO ("lock-table", table, out);
+ GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
-posix_lock_t *
-pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+ new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker);
+ if (new == NULL) {
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->lockers);
+
+ new->volume = gf_strdup (volume);
+
+ if (fd == NULL) {
+ loc_copy (&new->loc, loc);
+ } else {
+ new->fd = fd_ref (fd);
+ }
+
+ new->pid = pid;
+ new->owner = *owner;
+
+ LOCK (&table->lock);
+ {
+ if (type == GF_FOP_ENTRYLK)
+ list_add_tail (&new->lockers, &table->entrylk_lockers);
+ else
+ list_add_tail (&new->lockers, &table->inodelk_lockers);
+ }
+ UNLOCK (&table->lock);
+out:
+ return ret;
+}
+
+int
+pl_del_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type)
{
- posix_lock_t *conf = NULL;
+ struct _locker *locker = NULL;
+ struct _locker *tmp = NULL;
+ int32_t ret = -1;
+ struct list_head *head = NULL;
+ struct list_head del;
+
+ GF_VALIDATE_OR_GOTO ("lock-table", table, out);
+ GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
+
+ INIT_LIST_HEAD (&del);
+
+ LOCK (&table->lock);
+ {
+ if (type == GF_FOP_ENTRYLK) {
+ head = &table->entrylk_lockers;
+ } else {
+ head = &table->inodelk_lockers;
+ }
+
+ list_for_each_entry_safe (locker, tmp, head, lockers) {
+ if (!is_same_lkowner (&locker->owner, owner) ||
+ strcmp (locker->volume, volume))
+ continue;
+
+ /*
+ * It is possible for inodelk lock to come on anon-fd
+ * and inodelk unlock to come on normal fd in case of
+ * client re-opens. So don't check for fds to be equal.
+ */
+ if (locker->fd && fd)
+ list_move_tail (&locker->lockers, &del);
+ else if (locker->loc.inode && loc &&
+ (locker->loc.inode == loc->inode))
+ list_move_tail (&locker->lockers, &del);
+ }
+ }
+ UNLOCK (&table->lock);
- conf = first_overlap (pl_inode, lock, dom);
+ tmp = NULL;
+ locker = NULL;
- if (conf == NULL) {
- lock->fl_type = F_UNLCK;
- return lock;
- }
+ list_for_each_entry_safe (locker, tmp, &del, lockers) {
+ list_del_init (&locker->lockers);
+ if (locker->fd)
+ fd_unref (locker->fd);
+ else
+ loc_wipe (&locker->loc);
+
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
+
+ ret = 0;
+out:
+ return ret;
- return conf;
}
+
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index ee17b0087..db19ec978 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -1,43 +1,71 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __COMMON_H__
#define __COMMON_H__
+#include "lkowner.h"
+/*dump locks format strings */
+#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
+#define ENTRY_FMT "type=%s on basename=%s"
+#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p"
+#define GRNTD_AT "granted at %s"
+#define BLKD_AT "blocked at %s"
+#define CONN_ID "connection-id=%s"
+#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT
+#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT
+#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT
+
+#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT
+#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT
+#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT
+#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT
+#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+
+struct _locker {
+ struct list_head lockers;
+ char *volume;
+ loc_t loc;
+ fd_t *fd;
+ gf_lkowner_t owner;
+ pid_t pid;
+};
+
+struct _lock_table {
+ struct list_head inodelk_lockers;
+ struct list_head entrylk_lockers;
+ gf_lock_t lock;
+};
+
posix_lock_t *
-new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid);
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd);
pl_inode_t *
pl_inode_get (xlator_t *this, inode_t *inode);
posix_lock_t *
-pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain);
+pl_getlk (pl_inode_t *inode, posix_lock_t *lock);
int
pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock,
- int can_block, gf_lk_domain_t domain);
+ int can_block);
void
-grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain);
+grant_blocked_locks (xlator_t *this, pl_inode_t *inode);
void
-posix_lock_to_flock (posix_lock_t *lock, struct flock *flock);
+posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock);
int
locks_overlap (posix_lock_t *l1, posix_lock_t *l2);
@@ -49,4 +77,111 @@ void __delete_lock (pl_inode_t *, posix_lock_t *);
void __destroy_lock (posix_lock_t *);
+pl_dom_list_t *
+get_domain (pl_inode_t *pl_inode, const char *volume);
+
+void
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom);
+
+void
+__delete_inode_lock (pl_inode_lock_t *lock);
+
+void
+__pl_inodelk_unref (pl_inode_lock_t *lock);
+
+void
+grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *unlocked, pl_dom_list_t *dom);
+
+void pl_update_refkeeper (xlator_t *this, inode_t *inode);
+
+int32_t
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname);
+int32_t
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname);
+
+int32_t
+__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode);
+int32_t
+get_entrylk_count (xlator_t *this, inode_t *inode);
+
+void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain);
+
+void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain);
+
+void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain);
+
+void pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd);
+
+void entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+void entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type,
+ int op_ret, int op_errno);
+
+void entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+void
+pl_print_verdict (char *str, int size, int op_ret, int op_errno);
+
+void
+pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc);
+
+void
+pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame);
+
+void
+pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain);
+
+void
+pl_trace_release (xlator_t *this, fd_t *fd);
+
+unsigned long
+fd_to_fdnum (fd_t *fd);
+
+fd_t *
+fd_from_fdnum (posix_lock_t *lock);
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block);
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2);
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block);
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
+
+int32_t
+pl_add_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc,
+ fd_t *fd,
+ pid_t pid,
+ gf_lkowner_t *owner,
+ glusterfs_fop_t type);
+
+int32_t
+pl_del_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc,
+ fd_t *fd,
+ gf_lkowner_t *owner,
+ glusterfs_fop_t type);
+
+struct _lock_table *
+pl_lock_table_new (void);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
new file mode 100644
index 000000000..0785dc547
--- /dev/null
+++ b/xlators/features/locks/src/entrylk.c
@@ -0,0 +1,848 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+static pl_entry_lock_t *
+new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ client_t *client, pid_t client_pid, gf_lkowner_t *owner,
+ const char *volume)
+
+{
+ pl_entry_lock_t *newlock = NULL;
+
+ newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t),
+ gf_locks_mt_pl_entry_lock_t);
+ if (!newlock) {
+ goto out;
+ }
+
+ newlock->basename = basename ? gf_strdup (basename) : NULL;
+ newlock->type = type;
+ newlock->trans = client;
+ newlock->volume = volume;
+ newlock->client_pid = client_pid;
+ newlock->owner = *owner;
+
+ INIT_LIST_HEAD (&newlock->domain_list);
+ INIT_LIST_HEAD (&newlock->blocked_locks);
+
+out:
+ return newlock;
+}
+
+
+/**
+ * all_names - does a basename represent all names?
+ * @basename: name to check
+ */
+
+#define all_names(basename) ((basename == NULL) ? 1 : 0)
+
+/**
+ * names_conflict - do two names conflict?
+ * @n1: name
+ * @n2: name
+ */
+
+static int
+names_conflict (const char *n1, const char *n2)
+{
+ return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
+}
+
+
+static inline int
+__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
+{
+
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->trans == l2->trans));
+}
+
+
+/**
+ * lock_grantable - is this lock grantable?
+ * @inode: inode in which to look
+ * @basename: name we're trying to lock
+ * @type: type of lock
+ */
+static pl_entry_lock_t *
+__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ if (list_empty (&dom->entrylk_list))
+ return NULL;
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (names_conflict (lock->basename, basename))
+ return lock;
+ }
+
+ return NULL;
+}
+
+static pl_entry_lock_t *
+__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ if (list_empty (&dom->blocked_entrylks))
+ return NULL;
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ if (names_conflict (lock->basename, basename))
+ return lock;
+ }
+
+ return NULL;
+}
+
+static int
+__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (__same_entrylk_owner (lock, newlock))
+ return 1;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ if (__same_entrylk_owner (lock, newlock))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+names_equal (const char *n1, const char *n2)
+{
+ return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
+}
+
+void
+pl_print_entrylk (char *str, int size, entrylk_cmd cmd, entrylk_type type,
+ const char *basename, const char *domain)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+ case ENTRYLK_LOCK:
+ cmd_str = "LOCK";
+ break;
+
+ case ENTRYLK_LOCK_NB:
+ cmd_str = "LOCK_NB";
+ break;
+
+ case ENTRYLK_UNLOCK:
+ cmd_str = "UNLOCK";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (type) {
+ case ENTRYLK_RDLCK:
+ type_str = "READ";
+ break;
+ case ENTRYLK_WRLCK:
+ type_str = "WRITE";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s",
+ cmd_str, type_str, basename, domain);
+}
+
+
+void
+entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_entrylk);
+}
+
+
+void
+entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, int op_ret, int op_errno)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
+ char verdict[32];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
+ pl_print_verdict (verdict, 32, op_ret, op_errno);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ verdict, pl_locker, pl_lockee, pl_entrylk);
+}
+
+
+void
+entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_entrylk);
+}
+
+/**
+ * __find_most_matching_lock - find the lock struct which most matches in order of:
+ * lock on the exact basename ||
+ * an all_names lock
+ *
+ *
+ * @inode: inode in which to look
+ * @basename: name to search for
+ */
+
+static pl_entry_lock_t *
+__find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
+{
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *all = NULL;
+ pl_entry_lock_t *exact = NULL;
+
+ if (list_empty (&dom->entrylk_list))
+ return NULL;
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (all_names (lock->basename))
+ all = lock;
+ else if (names_equal (lock->basename, basename))
+ exact = lock;
+ }
+
+ return (exact ? exact : all);
+}
+
+/**
+ * __lock_name - lock a name in a directory
+ * @inode: inode for the directory in which to lock
+ * @basename: name of the entry to lock
+ * if null, lock the entire directory
+ *
+ * the entire directory being locked is represented as: a single
+ * pl_entry_lock_t present in the entrylk_locks list with its
+ * basename = NULL
+ */
+
+int
+__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this,
+ int nonblock, char *conn_id)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ lock = new_entrylk_lock (pinode, basename, type,
+ frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, dom->domain);
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ lock->frame = frame;
+ lock->this = this;
+ lock->trans = frame->root->client;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
+
+ conf = __lock_grantable (dom, basename, type);
+ if (conf) {
+ ret = -EAGAIN;
+ if (nonblock){
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock);
+ goto out;
+
+ }
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, basename);
+
+ goto out;
+ }
+
+ if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) {
+ ret = -EAGAIN;
+ if (nonblock) {
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *) lock->basename);
+ GF_FREE (lock);
+ goto out;
+
+ }
+ lock->frame = frame;
+ lock->this = this;
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock is grantable, but blocking to prevent starvation");
+ gf_log (this->name, GF_LOG_TRACE,
+ "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, basename);
+
+ ret = -EAGAIN;
+ goto out;
+ }
+ switch (type) {
+
+ case ENTRYLK_WRLCK:
+ gettimeofday (&lock->granted_time, NULL);
+ list_add_tail (&lock->domain_list, &dom->entrylk_list);
+ break;
+
+ default:
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Invalid type for entrylk specified: %d", type);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * __unlock_name - unlock a name in a directory
+ * @inode: inode for the directory to unlock in
+ * @basename: name of the entry to unlock
+ * if null, unlock the entire directory
+ */
+
+pl_entry_lock_t *
+__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *ret_lock = NULL;
+
+ lock = __find_most_matching_lock (dom, basename);
+
+ if (!lock) {
+ gf_log ("locks", GF_LOG_DEBUG,
+ "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found",
+ basename);
+ goto out;
+ }
+
+ if (names_equal (lock->basename, basename)
+ && lock->type == type) {
+
+ if (type == ENTRYLK_WRLCK) {
+ list_del_init (&lock->domain_list);
+ ret_lock = lock;
+ }
+ } else {
+ gf_log ("locks", GF_LOG_DEBUG,
+ "Unlock for a non-existing lock!");
+ goto out;
+ }
+
+out:
+ return ret_lock;
+}
+
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
+{
+ uint32_t entrylk = 0;
+ pl_inode_t *pinode = 0;
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *conf = NULL;
+
+ pinode = pl_inode_get (this, parent);
+ if (!pinode)
+ goto out;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_for_each_entry (dom, &pinode->dom_list, inode_list) {
+ conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK);
+ if (conf && conf->basename) {
+ entrylk = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ return entrylk;
+}
+
+void
+__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct list_head *granted)
+{
+ int bl_ret = 0;
+ pl_entry_lock_t *bl = NULL;
+ pl_entry_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&dom->blocked_entrylks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list,
+ blocked_locks) {
+
+ list_del_init (&bl->blocked_locks);
+
+
+ gf_log ("locks", GF_LOG_TRACE,
+ "Trying to unblock: {pinode=%p, basename=%s}",
+ pl_inode, bl->basename);
+
+ bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
+ bl->frame, dom, bl->this, 0,
+ bl->connection_id);
+
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "should never happen");
+ GF_FREE (bl->connection_id);
+ GF_FREE ((char *)bl->basename);
+ GF_FREE (bl);
+ }
+ }
+ return;
+}
+
+/* Grants locks if possible which are blocked on a lock */
+void
+grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *unlocked, pl_dom_list_t *dom)
+{
+ struct list_head granted_list;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lock = NULL;
+
+ INIT_LIST_HEAD (&granted_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_entry_locks (this, pl_inode, dom,
+ &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ entrylk_trace_out (this, lock->frame, NULL, NULL, NULL,
+ lock->basename, ENTRYLK_LOCK, lock->type,
+ 0, 0);
+
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock);
+ }
+
+ GF_FREE ((char *)unlocked->basename);
+ GF_FREE (unlocked->connection_id);
+ GF_FREE (unlocked);
+
+ return;
+}
+
+/**
+ * release_entry_locks_for_client: release all entry locks from this
+ * client for this loc_t
+ */
+
+static int
+release_entry_locks_for_client (xlator_t *this, pl_inode_t *pinode,
+ pl_dom_list_t *dom, client_t *client)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ struct list_head granted;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&granted);
+ INIT_LIST_HEAD (&released);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ if (lock->trans != client)
+ continue;
+
+ list_del_init (&lock->blocked_locks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "releasing lock on held by "
+ "{client=%p}", client);
+
+ list_add (&lock->blocked_locks, &released);
+
+ }
+
+ list_for_each_entry_safe (lock, tmp, &dom->entrylk_list,
+ domain_list) {
+ if (lock->trans != client)
+ continue;
+
+ list_del_init (&lock->domain_list);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "releasing lock on held by "
+ "{client=%p}", client);
+
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+
+ __grant_blocked_entry_locks (this, pinode, dom, &granted);
+
+ }
+
+ pthread_mutex_unlock (&pinode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &released, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL);
+
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+
+ }
+
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+
+ return 0;
+}
+
+/* Common entrylk code called by pl_entrylk and pl_fentrylk */
+int
+pl_common_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, inode_t *inode, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ dom = get_domain (pinode, volume);
+ if (!dom){
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
+
+ if (frame->root->lk_owner.len == 0) {
+ /*
+ this is a special case that means release
+ all locks from this client
+ */
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing locks for client %p", frame->root->client);
+
+ release_entry_locks_for_client (this, pinode, dom,
+ frame->root->client);
+ op_ret = 0;
+
+ goto out;
+ }
+
+ switch (cmd) {
+ case ENTRYLK_LOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, dom, this, 0, conn_id);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ op_errno = -ret;
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ unwind = 0;
+ else
+ unwind = 1;
+ goto out;
+ } else {
+ op_ret = 0;
+ op_errno = 0;
+ unwind = 1;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_LOCK_NB:
+ unwind = 1;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, dom, this, 1, conn_id);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_UNLOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ unlocked = __unlock_name (dom, basename, type);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (unlocked)
+ grant_blocked_entry_locks (this, pinode, unlocked, dom);
+
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unexpected case in entrylk (cmd=%d). Please file"
+ "a bug report at http://bugs.gluster.com", cmd);
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ pl_update_refkeeper (this, inode);
+ if (unwind) {
+ entrylk_trace_out (this, frame, volume, fd, loc, basename,
+ cmd, type, op_ret, op_errno);
+
+ ctx = pl_ctx_get (frame->root->client, this);
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+
+ if (cmd == ENTRYLK_UNLOCK)
+ pl_del_locker (ctx->ltable, volume, loc, fd,
+ &frame->root->lk_owner,
+ GF_FOP_ENTRYLK);
+ else
+ pl_add_locker (ctx->ltable, volume, loc, fd,
+ frame->root->pid,
+ &frame->root->lk_owner,
+ GF_FOP_ENTRYLK);
+
+unwind:
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL);
+ } else {
+ entrylk_trace_block (this, frame, volume, fd, loc, basename,
+ cmd, type);
+ }
+
+
+ return 0;
+}
+
+/**
+ * pl_entrylk:
+ *
+ * Locking on names (directory entries)
+ */
+
+int
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd,
+ type, loc, NULL, xdata);
+
+ return 0;
+}
+
+
+/**
+ * pl_fentrylk:
+ *
+ * Locking on names (directory entries)
+ */
+
+int
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd,
+ type, NULL, fd, xdata);
+
+ return 0;
+}
+
+
+int32_t
+__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
+{
+ int32_t count = 0;
+ pl_entry_lock_t *lock = NULL;
+ pl_dom_list_t *dom = NULL;
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ count++;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ count++;
+ }
+
+ }
+
+ return count;
+}
+
+int32_t
+get_entrylk_count (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ count = __get_entrylk_count (this, pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+out:
+ return count;
+}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
new file mode 100644
index 000000000..508523e11
--- /dev/null
+++ b/xlators/features/locks/src/inodelk.c
@@ -0,0 +1,825 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+inline void
+__delete_inode_lock (pl_inode_lock_t *lock)
+{
+ list_del (&lock->list);
+}
+
+static inline void
+__pl_inodelk_ref (pl_inode_lock_t *lock)
+{
+ lock->ref++;
+}
+
+inline void
+__pl_inodelk_unref (pl_inode_lock_t *lock)
+{
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+}
+
+/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */
+static inline int
+inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
+ return 1;
+
+ return 0;
+}
+
+void
+pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
+ "domain: %s, start=%llu, len=%llu, pid=%llu",
+ cmd_str, type_str, domain,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+}
+
+/* Determine if the two inodelks overlap reach other's lock regions */
+static int
+inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
+}
+
+/* Returns true if the 2 inodelks have the same owner */
+static inline int
+same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
+}
+
+/* Returns true if the 2 inodelks conflict with each other */
+static int
+inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ return (inodelk_overlap (l1, l2) &&
+ inodelk_type_conflict (l1, l2));
+}
+
+/* Determine if lock is grantable or not */
+static pl_inode_lock_t *
+__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+{
+ pl_inode_lock_t *l = NULL;
+ pl_inode_lock_t *ret = NULL;
+ if (list_empty (&dom->inodelk_list))
+ goto out;
+ list_for_each_entry (l, &dom->inodelk_list, list){
+ if (inodelk_conflict (lock, l) &&
+ !same_inodelk_owner (lock, l)) {
+ ret = l;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+static pl_inode_lock_t *
+__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+{
+ pl_inode_lock_t *l = NULL;
+ pl_inode_lock_t *ret = NULL;
+
+ if (list_empty (&dom->blocked_inodelks))
+ return NULL;
+
+ list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
+ if (inodelk_conflict (lock, l)) {
+ ret = l;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
+{
+ pl_inode_lock_t *lock = NULL;
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ if (same_inodelk_owner (lock, newlock))
+ return 1;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ if (same_inodelk_owner (lock, newlock))
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* Determines if lock can be granted and adds the lock. If the lock
+ * is blocking, adds it to the blocked_inodelks list of the domain.
+ */
+static int
+__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ int can_block, pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ conf = __inodelk_grantable (dom, lock);
+ if (conf){
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+
+ if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock is grantable, but blocking to prevent starvation");
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+ __pl_inodelk_ref (lock);
+ gettimeofday (&lock->granted_time, NULL);
+ list_add (&lock->list, &dom->inodelk_list);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* Return true if the two inodelks have exactly same lock boundaries */
+static int
+inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ if ((l1->fl_start == l2->fl_start) &&
+ (l1->fl_end == l2->fl_end))
+ return 1;
+
+ return 0;
+}
+
+
+static pl_inode_lock_t *
+find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *l = NULL;
+ list_for_each_entry (l, &dom->inodelk_list, list) {
+ if (inodelks_equal (l, lock) &&
+ same_inodelk_owner (l, lock))
+ return l;
+ }
+ return NULL;
+}
+
+/* Set F_UNLCK removes a lock which has the exact same lock boundaries
+ * as the UNLCK lock specifies. If such a lock is not found, returns invalid
+ */
+static pl_inode_lock_t *
+__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
+{
+
+ pl_inode_lock_t *conf = NULL;
+
+ conf = find_matching_inodelk (lock, dom);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+ "on %p", (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end,
+ lkowner_utoa (&lock->owner), lock->client);
+ goto out;
+ }
+ __delete_inode_lock (conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock %llu-%llu, by %s on %p",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner),
+ lock->client);
+
+out:
+ return conf;
+}
+static void
+__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom)
+{
+ int bl_ret = 0;
+ pl_inode_lock_t *bl = NULL;
+ pl_inode_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&dom->blocked_inodelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
+
+ list_del_init (&bl->blocked_locks);
+
+ bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
+ }
+ }
+ return;
+}
+
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom)
+{
+ struct list_head granted;
+ pl_inode_lock_t *lock;
+ pl_inode_lock_t *tmp;
+
+ INIT_LIST_HEAD (&granted);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, lock->volume);
+
+ STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_inodelk_unref (lock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+}
+
+/* Release all inodelks from this client */
+static int
+release_inode_locks_of_client (xlator_t *this, pl_dom_list_t *dom,
+ inode_t *inode, client_t *client)
+{
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *l = NULL;
+
+ pl_inode_t * pinode = NULL;
+
+ struct list_head released;
+
+ char *path = NULL;
+ char *file = NULL;
+
+ INIT_LIST_HEAD (&released);
+
+ pinode = pl_inode_get (this, inode);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+
+ list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
+ if (l->client != client)
+ continue;
+
+ list_del_init (&l->blocked_locks);
+
+ inode_path (inode, NULL, &path);
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (inode->gfid);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing blocking lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, client, (uint64_t) l->client_pid,
+ lkowner_utoa (&l->owner));
+
+ list_add (&l->blocked_locks, &released);
+ if (path) {
+ GF_FREE (path);
+ path = NULL;
+ }
+ }
+
+ list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
+ if (l->client != client)
+ continue;
+
+ inode_path (inode, NULL, &path);
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (inode->gfid);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing granted lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, client, (uint64_t) l->client_pid,
+ lkowner_utoa (&l->owner));
+
+ if (path) {
+ GF_FREE (path);
+ path = NULL;
+ }
+
+ __delete_inode_lock (l);
+ __pl_inodelk_unref (l);
+ }
+ }
+ GF_FREE (path);
+
+ pthread_mutex_unlock (&pinode->mutex);
+
+ list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
+ list_del_init (&l->blocked_locks);
+
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (l);
+ }
+
+ grant_blocked_inode_locks (this, pinode, dom);
+ return 0;
+}
+
+
+static int
+pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ int can_block, pl_dom_list_t *dom)
+{
+ int ret = -EINVAL;
+ pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (lock->fl_type != F_UNLCK) {
+ ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->fl_start,
+ lock->fl_end);
+ } else if (ret == -EAGAIN) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ if (can_block)
+ unref = _gf_false;
+ }
+ } else {
+ retlock = __inode_unlock_lock (this, lock, dom);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+ __pl_inodelk_unref (retlock);
+
+ ret = 0;
+ }
+ }
+out:
+ if (unref)
+ __pl_inodelk_unref (lock);
+ pthread_mutex_unlock (&pl_inode->mutex);
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ return ret;
+}
+
+/* Create a new inode_lock_t */
+pl_inode_lock_t *
+new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ call_frame_t *frame, xlator_t *this, const char *volume,
+ char *conn_id)
+
+{
+ pl_inode_lock_t *lock = NULL;
+
+ lock = GF_CALLOC (1, sizeof (*lock),
+ gf_locks_mt_pl_inode_lock_t);
+ if (!lock) {
+ return NULL;
+ }
+
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
+
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ lock->client = client;
+ lock->client_pid = client_pid;
+ lock->volume = volume;
+ lock->owner = frame->root->lk_owner;
+ lock->frame = frame;
+ lock->this = this;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
+
+ INIT_LIST_HEAD (&lock->list);
+ INIT_LIST_HEAD (&lock->blocked_locks);
+ __pl_inodelk_ref (lock);
+
+ return lock;
+}
+
+int32_t
+_pl_convert_volume (const char *volume, char **res)
+{
+ char *mdata_vol = NULL;
+ int ret = 0;
+
+ mdata_vol = strrchr (volume, ':');
+ //if the volume already ends with :metadata don't bother
+ if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0))
+ return 0;
+
+ ret = gf_asprintf (res, "%s:metadata", volume);
+ if (ret <= 0)
+ return ENOMEM;
+ return 0;
+}
+
+int32_t
+_pl_convert_volume_for_special_range (struct gf_flock *flock,
+ const char *volume, char **res)
+{
+ int32_t ret = 0;
+
+ if ((flock->l_start == LLONG_MAX -1) &&
+ (flock->l_len == 0)) {
+ ret = _pl_convert_volume (volume, res);
+ }
+
+ return ret;
+}
+
+/* Common inodelk code called from pl_inodelk and pl_finodelk */
+int
+pl_common_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, inode_t *inode, int32_t cmd,
+ struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
+ pl_inode_t * pinode = NULL;
+ pl_inode_lock_t * reqlock = NULL;
+ pl_dom_list_t * dom = NULL;
+ char *res = NULL;
+ char *res1 = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (inode, unwind);
+ VALIDATE_OR_GOTO (flock, unwind);
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ op_errno = _pl_convert_volume_for_special_range (flock, volume, &res);
+ if (op_errno)
+ goto unwind;
+ if (res)
+ volume = res;
+
+ pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ dom = get_domain (pinode, volume);
+ if (!dom) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (frame->root->lk_owner.len == 0) {
+ /*
+ special case: this means release all locks
+ from this client
+ */
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing all locks from client %p", frame->root->client);
+
+ release_inode_locks_of_client (this, dom, inode, frame->root->client);
+ _pl_convert_volume (volume, &res1);
+ if (res1) {
+ dom = get_domain (pinode, res1);
+ if (dom)
+ release_inode_locks_of_client (this, dom,
+ inode, frame->root->client);
+ }
+
+ op_ret = 0;
+ goto unwind;
+ }
+
+ reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
+ frame, this, volume, conn_id);
+
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+
+ switch (cmd) {
+ case F_SETLKW:
+ can_block = 1;
+
+ /* fall through */
+
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ ret = pl_inode_setlk (this, pinode, reqlock,
+ can_block, dom);
+
+ if (ret < 0) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
+ pl_trace_block (this, frame, fd, loc,
+ cmd, flock, volume);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
+ op_errno = -ret;
+ goto unwind;
+ }
+ break;
+
+ default:
+ op_errno = ENOTSUP;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock command F_GETLK not supported for [f]inodelk "
+ "(cmd=%d)",
+ cmd);
+ goto unwind;
+ }
+
+ op_ret = 0;
+
+ ctx = pl_ctx_get (frame->root->client, this);
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+
+ if (flock->l_type == F_UNLCK)
+ pl_del_locker (ctx->ltable, volume, loc, fd,
+ &frame->root->lk_owner,
+ GF_FOP_INODELK);
+ else
+ pl_add_locker (ctx->ltable, volume, loc, fd,
+ frame->root->pid,
+ &frame->root->lk_owner,
+ GF_FOP_INODELK);
+
+unwind:
+ if ((inode != NULL) && (flock !=NULL)) {
+ pl_update_refkeeper (this, inode);
+ pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
+ }
+
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
+out:
+ GF_FREE (res);
+ GF_FREE (res1);
+ return 0;
+}
+
+int
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock,
+ loc, NULL, xdata);
+
+ return 0;
+}
+
+int
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock,
+ NULL, fd, xdata);
+
+ return 0;
+
+}
+
+static inline int32_t
+__get_inodelk_dom_count (pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ count++;
+ }
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ count++;
+ }
+ return count;
+}
+
+/* Returns the no. of locks (blocked/granted) held on a given domain name
+ * If @domname is NULL, returns the no. of locks in all the domains present.
+ * If @domname is non-NULL and non-existent, returns 0 */
+int32_t
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname)
+{
+ int32_t count = 0;
+ pl_dom_list_t *dom = NULL;
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (domname) {
+ if (strcmp (domname, dom->domain) == 0) {
+ count = __get_inodelk_dom_count (dom);
+ goto out;
+ }
+
+ } else {
+ /* Counting locks from all domains */
+ count += __get_inodelk_dom_count (dom);
+
+ }
+ }
+
+out:
+ return count;
+}
+
+int32_t
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ count = __get_inodelk_count (this, pl_inode, domname);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+out:
+ return count;
+}
diff --git a/xlators/features/locks/src/internal.c b/xlators/features/locks/src/internal.c
deleted file mode 100644
index 6524721b4..000000000
--- a/xlators/features/locks/src/internal.c
+++ /dev/null
@@ -1,896 +0,0 @@
-/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-
-#include "locks.h"
-#include "common.h"
-
-
-static int
-release_inode_locks_of_transport (xlator_t *this,
- inode_t *inode, transport_t *trans)
-{
- posix_lock_t *tmp = NULL;
- posix_lock_t *l = NULL;
-
- pl_inode_t * pinode = NULL;
-
- struct list_head granted;
-
- char *path = NULL;
-
- INIT_LIST_HEAD (&granted);
-
- pinode = pl_inode_get (this, inode);
-
- pthread_mutex_lock (&pinode->mutex);
- {
- if (list_empty (&pinode->int_list)) {
- goto unlock;
- }
-
- list_for_each_entry_safe (l, tmp, &pinode->int_list, list) {
- if (l->transport != trans)
- continue;
-
- list_del_init (&l->list);
-
- __delete_lock (pinode, l);
-
- inode_path (inode, NULL, &path);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64"}",
- path, trans,
- (uint64_t) l->client_pid);
-
- if (path)
- FREE (path);
-
- __destroy_lock (l);
- }
- }
-unlock:
- pthread_mutex_unlock (&pinode->mutex);
-
- grant_blocked_locks (this, pinode, GF_LOCK_INTERNAL);
-
- return 0;
-}
-
-
-/**
- * pl_inodelk:
- *
- * This fop provides fcntl-style locking on files for internal
- * purposes. Locks held through this fop reside in a domain different
- * from those held by applications. This fop is for the use of AFR.
- */
-
-int
-pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- posix_locks_private_t * priv = NULL;
- transport_t * transport = NULL;
- pid_t client_pid = -1;
- pl_inode_t * pinode = NULL;
- posix_lock_t * reqlock = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (flock, out);
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_errno = EINVAL;
- goto unwind;
- }
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
-
- priv = (posix_locks_private_t *) this->private;
-
- VALIDATE_OR_GOTO (priv, out);
-
- pinode = pl_inode_get (this, loc->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (client_pid == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, loc->inode, transport);
- goto unwind;
- }
-
- reqlock = new_posix_lock (flock, transport, client_pid);
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- switch (cmd) {
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
-
- /* fall through */
-
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_setlk (this, pinode, reqlock,
- can_block, GF_LOCK_INTERNAL);
-
- if (ret == -1) {
- if (can_block)
- goto out;
-
- gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
- op_errno = EAGAIN;
- __destroy_lock (reqlock);
- goto unwind;
- }
- break;
-
- default:
- op_errno = ENOTSUP;
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in inodelk (cmd=%d). "
- "Please file a bug report at http://bugs.gluster.com",
- cmd);
- goto unwind;
- }
-
- op_ret = 0;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno);
-out:
- return 0;
-}
-
-
-int
-pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- posix_locks_private_t * priv = NULL;
- transport_t * transport = NULL;
- pid_t client_pid = -1;
- pl_inode_t * pinode = NULL;
- posix_lock_t * reqlock = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (flock, out);
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_errno = EINVAL;
- goto unwind;
- }
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
-
- priv = (posix_locks_private_t *) this->private;
-
- VALIDATE_OR_GOTO (priv, out);
-
- pinode = pl_inode_get (this, fd->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (client_pid == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, fd->inode, transport);
- goto unwind;
- }
-
- reqlock = new_posix_lock (flock, transport, client_pid);
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- switch (cmd) {
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
- reqlock->fd = fd;
-
- /* fall through */
-
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_setlk (this, pinode, reqlock,
- can_block, GF_LOCK_INTERNAL);
-
- if (ret == -1) {
- if (can_block)
- goto out;
-
- gf_log (this->name, GF_LOG_TRACE, "Returning EAGAIN");
- op_errno = EAGAIN;
- __destroy_lock (reqlock);
- goto unwind;
- }
- break;
-
- default:
- op_errno = ENOTSUP;
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in finodelk (cmd=%d). "
- "Please file a bug report at http://bugs.gluster.com",
- cmd);
- goto unwind;
- }
-
- op_ret = 0;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno);
-out:
- return 0;
-}
-
-
-/**
- * types_conflict - do two types of lock conflict?
- * @t1: type
- * @t2: type
- *
- * two read locks do not conflict
- * any other case conflicts
- */
-
-static int
-types_conflict (entrylk_type t1, entrylk_type t2)
-{
- return !((t1 == ENTRYLK_RDLCK) && (t2 == ENTRYLK_RDLCK));
-}
-
-/**
- * all_names - does a basename represent all names?
- * @basename: name to check
- */
-
-#define all_names(basename) ((basename == NULL) ? 1 : 0)
-
-/**
- * names_conflict - do two names conflict?
- * @n1: name
- * @n2: name
- */
-
-static int
-names_conflict (const char *n1, const char *n2)
-{
- return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
-}
-
-
-static int
-names_equal (const char *n1, const char *n2)
-{
- return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
-}
-
-/**
- * lock_grantable - is this lock grantable?
- * @inode: inode in which to look
- * @basename: name we're trying to lock
- * @type: type of lock
- */
-
-static pl_entry_lock_t *
-__lock_grantable (pl_inode_t *pinode, const char *basename, entrylk_type type)
-{
- pl_entry_lock_t *lock = NULL;
-
- if (list_empty (&pinode->dir_list))
- return NULL;
-
- list_for_each_entry (lock, &pinode->dir_list, inode_list) {
- if (names_conflict (lock->basename, basename) &&
- types_conflict (lock->type, type))
- return lock;
- }
-
- return NULL;
-}
-
-/**
- * find_most_matching_lock - find the lock struct which most matches in order of:
- * lock on the exact basename ||
- * an all_names lock
- *
- *
- * @inode: inode in which to look
- * @basename: name to search for
- */
-
-static pl_entry_lock_t *
-__find_most_matching_lock (pl_inode_t *pinode, const char *basename)
-{
- pl_entry_lock_t *lock;
- pl_entry_lock_t *all = NULL;
- pl_entry_lock_t *exact = NULL;
-
- if (list_empty (&pinode->dir_list))
- return NULL;
-
- list_for_each_entry (lock, &pinode->dir_list, inode_list) {
- if (all_names (lock->basename))
- all = lock;
- else if (names_equal (lock->basename, basename))
- exact = lock;
- }
-
- return (exact ? exact : all);
-}
-
-
-/**
- * insert_new_lock - insert a new dir lock into the inode with the given parameters
- * @pinode: inode to insert into
- * @basename: basename for the lock
- * @type: type of the lock
- */
-
-static pl_entry_lock_t *
-new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- transport_t *trans)
-{
- pl_entry_lock_t *newlock = NULL;
-
- newlock = CALLOC (sizeof (pl_entry_lock_t), 1);
- if (!newlock) {
- goto out;
- }
-
- newlock->basename = basename ? strdup (basename) : NULL;
- newlock->type = type;
- newlock->trans = trans;
-
- if (type == ENTRYLK_RDLCK)
- newlock->read_count = 1;
-
- INIT_LIST_HEAD (&newlock->inode_list);
- INIT_LIST_HEAD (&newlock->blocked_locks);
-
-out:
- return newlock;
-}
-
-/**
- * lock_name - lock a name in a directory
- * @inode: inode for the directory in which to lock
- * @basename: name of the entry to lock
- * if null, lock the entire directory
- *
- * the entire directory being locked is represented as: a single
- * pl_entry_lock_t present in the entrylk_locks list with its
- * basename = NULL
- */
-
-int
-__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
- call_frame_t *frame, xlator_t *this, int nonblock)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *conf = NULL;
-
- transport_t *trans = NULL;
-
- int ret = -EINVAL;
-
- trans = frame->root->trans;
-
- conf = __lock_grantable (pinode, basename, type);
- if (conf) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- lock = new_entrylk_lock (pinode, basename, type, trans);
-
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
-
- lock->frame = frame;
- lock->this = this;
- lock->blocked = 1;
-
- list_add (&lock->blocked_locks, &conf->blocked_locks);
-
-
- goto out;
- }
-
- switch (type) {
- case ENTRYLK_RDLCK:
- lock = __find_most_matching_lock (pinode, basename);
-
- if (lock && names_equal (lock->basename, basename)) {
- lock->read_count++;
-
- FREE (lock->basename);
- FREE (lock);
-
- lock = NULL;
- } else {
- lock = new_entrylk_lock (pinode, basename, type, trans);
-
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- list_add (&lock->inode_list, &pinode->dir_list);
- }
- break;
-
- case ENTRYLK_WRLCK:
- lock = new_entrylk_lock (pinode, basename, type, trans);
-
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- list_add (&lock->inode_list, &pinode->dir_list);
- break;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-/**
- * unlock_name - unlock a name in a directory
- * @inode: inode for the directory to unlock in
- * @basename: name of the entry to unlock
- * if null, unlock the entire directory
- */
-
-pl_entry_lock_t *
-__unlock_name (pl_inode_t *pinode, const char *basename, entrylk_type type)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *ret_lock = NULL;
-
- lock = __find_most_matching_lock (pinode, basename);
-
- if (!lock) {
- gf_log ("locks", GF_LOG_DEBUG,
- "unlock on %s (type=%s) attempted but no matching lock found",
- basename, type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK");
- goto out;
- }
-
- if (names_equal (lock->basename, basename)
- && lock->type == type) {
- if (type == ENTRYLK_RDLCK) {
- lock->read_count--;
- }
- if (type == ENTRYLK_WRLCK || lock->read_count == 0) {
- list_del (&lock->inode_list);
- ret_lock = lock;
- }
- } else {
- gf_log ("locks", GF_LOG_DEBUG,
- "Unlock for a non-existing lock!");
- goto out;
- }
-
-out:
- return ret_lock;
-}
-
-
-void
-__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *lock,
- struct list_head *granted)
-{
- int bl_ret = 0;
- pl_entry_lock_t *bl = NULL;
- pl_entry_lock_t *tmp = NULL;
-
- list_for_each_entry_safe (bl, tmp, &lock->blocked_locks,
- blocked_locks) {
- list_del_init (&bl->blocked_locks);
-
- /* TODO: error checking */
-
- gf_log ("locks", GF_LOG_TRACE,
- "Trying to unblock: {pinode=%p, basename=%s}",
- pl_inode, bl->basename);
-
- bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
- bl->frame, bl->this, 0);
-
- if (bl_ret == 0) {
- list_add (&bl->blocked_locks, granted);
- } else {
- if (bl->basename)
- FREE (bl->basename);
- FREE (bl);
- }
- }
- return;
-}
-
-
-void
-grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked)
-{
- struct list_head granted_list;
- pl_entry_lock_t *tmp = NULL;
- pl_entry_lock_t *lock = NULL;
-
- INIT_LIST_HEAD (&granted_list);
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_entry_locks (this, pl_inode, unlocked,
- &granted_list);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND (lock->frame, 0, 0);
-
- FREE (lock->basename);
- FREE (lock);
- }
-
- FREE (unlocked->basename);
- FREE (unlocked);
-
- return;
-}
-
-
-/**
- * release_entry_locks_for_transport: release all entry locks from this
- * transport for this loc_t
- */
-
-static int
-release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
- transport_t *trans)
-{
- pl_entry_lock_t *lock;
- pl_entry_lock_t *tmp;
- struct list_head granted;
-
- INIT_LIST_HEAD (&granted);
-
- pthread_mutex_lock (&pinode->mutex);
- {
- if (list_empty (&pinode->dir_list)) {
- goto unlock;
- }
-
- list_for_each_entry_safe (lock, tmp, &pinode->dir_list,
- inode_list) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->inode_list);
- __grant_blocked_entry_locks (this, pinode, lock,
- &granted);
-
- FREE (lock->basename);
- FREE (lock);
- }
- }
-unlock:
- pthread_mutex_unlock (&pinode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND (lock->frame, 0, 0);
-
- FREE (lock->basename);
- FREE (lock);
- }
-
- return 0;
-}
-
-
-/**
- * pl_entrylk:
- *
- * Locking on names (directory entries)
- */
-
-int
-pl_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- transport_t * transport = NULL;
- pid_t pid = -1;
-
- pl_inode_t * pinode = NULL;
- int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
- char unwind = 1;
-
-
- pinode = pl_inode_get (this, loc->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- pid = frame->root->pid;
- transport = frame->root->trans;
-
- if (pid == 0) {
- /*
- this is a special case that means release
- all locks from this transport
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
-
- release_entry_locks_for_transport (this, pinode, transport);
- op_ret = 0;
-
- goto out;
- }
-
- switch (cmd) {
- case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 0);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- op_errno = -ret;
- goto out;
- }
-
- break;
-
- case ENTRYLK_LOCK_NB:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 1);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- break;
-
- case ENTRYLK_UNLOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- unlocked = __unlock_name (pinode, basename, type);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked);
-
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in entrylk (cmd=%d). Please file"
- "a bug report at http://bugs.gluster.com", cmd);
- goto out;
- }
-
- op_ret = 0;
-out:
- if (unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- }
-
- return 0;
-}
-
-
-/**
- * pl_entrylk:
- *
- * Locking on names (directory entries)
- */
-
-int
-pl_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- transport_t * transport = NULL;
- pid_t pid = -1;
-
- pl_inode_t * pinode = NULL;
- int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
- char unwind = 1;
-
- pinode = pl_inode_get (this, fd->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory :(");
- goto out;
- }
-
- pid = frame->root->pid;
- transport = frame->root->trans;
-
- if (pid == 0) {
- /*
- this is a special case that means release
- all locks from this transport
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
-
- release_entry_locks_for_transport (this, pinode, transport);
- op_ret = 0;
- goto out;
- }
-
- switch (cmd) {
- case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 0);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- op_errno = -ret;
- goto out;
- }
- break;
-
- case ENTRYLK_LOCK_NB:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 1);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- break;
-
- case ENTRYLK_UNLOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- unlocked = __unlock_name (pinode, basename, type);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in fentrylk (cmd=%d). "
- "Please file a bug report at http://bugs.gluster.com",
- cmd);
- goto out;
- }
-
- op_ret = 0;
-out:
- if (unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- }
-
- return 0;
-}
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
new file mode 100644
index 000000000..08aeb0a79
--- /dev/null
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LOCKS_MEM_TYPES_H__
+#define __LOCKS_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_locks_mem_types_ {
+ gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1,
+ gf_locks_mt_pl_inode_t,
+ gf_locks_mt_posix_lock_t,
+ gf_locks_mt_pl_entry_lock_t,
+ gf_locks_mt_pl_inode_lock_t,
+ gf_locks_mt_truncate_ops,
+ gf_locks_mt_pl_rw_req_t,
+ gf_locks_mt_posix_locks_private_t,
+ gf_locks_mt_pl_fdctx_t,
+ gf_locks_mt_end
+};
+#endif
+
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 5a834657d..76fc941d7 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
@@ -26,86 +16,177 @@
#endif
#include "compat-errno.h"
-#include "transport.h"
#include "stack.h"
#include "call-stub.h"
+#include "locks-mem-types.h"
+#include "client_t.h"
+
+#include "lkowner.h"
struct __pl_fd;
struct __posix_lock {
- struct list_head list;
+ struct list_head list;
+
+ short fl_type;
+ off_t fl_start;
+ off_t fl_end;
- short fl_type;
- off_t fl_start;
- off_t fl_end;
+ short blocked; /* waiting to acquire */
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ unsigned long fd_num;
- short blocked; /* waiting to acquire */
- struct flock user_flock; /* the flock supplied by the user */
- xlator_t *this; /* required for blocked locks */
- fd_t *fd;
+ fd_t *fd;
+ call_frame_t *frame;
- call_frame_t *frame;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
- /* These two together serve to uniquely identify each process
- across nodes */
+ /* These two together serve to uniquely identify each process
+ across nodes */
- transport_t *transport; /* to identify client node */
- pid_t client_pid; /* pid of client process */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
};
typedef struct __posix_lock posix_lock_t;
+struct __pl_inode_lock {
+ struct list_head list;
+ struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ int ref;
+
+ short fl_type;
+ off_t fl_start;
+ off_t fl_end;
+
+ const char *volume;
+
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ fd_t *fd;
+
+ call_frame_t *frame;
+
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
+ /* These two together serve to uniquely identify each process
+ across nodes */
+
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
+};
+typedef struct __pl_inode_lock pl_inode_lock_t;
+
struct __pl_rw_req_t {
- struct list_head list;
- call_stub_t *stub;
- posix_lock_t region;
+ struct list_head list;
+ call_stub_t *stub;
+ posix_lock_t region;
};
typedef struct __pl_rw_req_t pl_rw_req_t;
+struct __pl_dom_list_t {
+ struct list_head inode_list; /* list_head back to pl_inode_t */
+ const char *domain;
+ struct list_head entrylk_list; /* List of entry locks */
+ struct list_head blocked_entrylks; /* List of all blocked entrylks */
+ struct list_head inodelk_list; /* List of inode locks */
+ struct list_head blocked_inodelks; /* List of all blocked inodelks */
+};
+typedef struct __pl_dom_list_t pl_dom_list_t;
struct __entry_lock {
- struct list_head inode_list; /* list_head back to pl_inode_t */
- struct list_head blocked_locks; /* locks blocked due to this lock */
-
- call_frame_t *frame;
- xlator_t *this;
- int blocked;
-
- const char *basename;
- entrylk_type type;
- unsigned int read_count; /* number of read locks */
- transport_t *trans;
+ struct list_head domain_list; /* list_head back to pl_dom_list_t */
+ struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+
+ call_frame_t *frame;
+ xlator_t *this;
+
+ const char *volume;
+
+ const char *basename;
+ entrylk_type type;
+
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
+ void *trans;
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
};
typedef struct __entry_lock pl_entry_lock_t;
-/* The "simulated" inode. This contains a list of all the locks associated
+/* The "simulated" inode. This contains a list of all the locks associated
with this file */
struct __pl_inode {
- pthread_mutex_t mutex;
-
- struct list_head dir_list; /* list of entry locks */
- struct list_head ext_list; /* list of fcntl locks */
- struct list_head int_list; /* list of internal locks */
- struct list_head rw_list; /* list of waiting r/w requests */
- int mandatory; /* if mandatory locking is enabled */
+ pthread_mutex_t mutex;
+
+ struct list_head dom_list; /* list of domains */
+ struct list_head ext_list; /* list of fcntl locks */
+ struct list_head rw_list; /* list of waiting r/w requests */
+ struct list_head reservelk_list; /* list of reservelks */
+ struct list_head blocked_reservelks; /* list of blocked reservelks */
+ struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/
+ int mandatory; /* if mandatory locking is enabled */
+
+ inode_t *refkeeper; /* hold refs on an inode while locks are
+ held to prevent pruning */
};
typedef struct __pl_inode pl_inode_t;
-#define LOCKS_FOR_DOMAIN(inode,domain) (domain == GF_LOCK_POSIX \
- ? inode->fcntl_locks \
- : inode->inodelk_locks)
-
struct __pl_fd {
- gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
+ gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
};
typedef struct __pl_fd pl_fd_t;
typedef struct {
- gf_boolean_t mandatory; /* if mandatory locking is enabled */
+ gf_boolean_t mandatory; /* if mandatory locking is enabled */
+ gf_boolean_t trace; /* trace lock requests in and out */
+ char *brickname;
} posix_locks_private_t;
+typedef struct {
+ gf_boolean_t entrylk_count_req;
+ gf_boolean_t inodelk_count_req;
+ gf_boolean_t inodelk_dom_count_req;
+ gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+
+ /* used by {f,}truncate */
+ loc_t loc;
+ fd_t *fd;
+ off_t offset;
+ dict_t *xdata;
+ enum {TRUNCATE, FTRUNCATE} op;
+} pl_local_t;
+
+
+typedef struct {
+ struct list_head locks_list;
+} pl_fdctx_t;
+
+
+typedef struct _locks_ctx {
+ gf_lock_t ltable_lock; /* only for replace,
+ ltable has its own internal
+ lock for operations */
+ struct _lock_table *ltable;
+} pl_ctx_t;
+
+
+pl_ctx_t *
+pl_ctx_get (client_t *client, xlator_t *xlator);
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 7389e1e6a..7bfb38a51 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -37,6 +27,9 @@
#include "locks.h"
#include "common.h"
#include "statedump.h"
+#include "clear.h"
+#include "defaults.h"
+#include "syncop.h"
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -47,887 +40,2733 @@
void do_blocked_rw (pl_inode_t *);
static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
+static int format_brickname(char *);
+int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *);
+static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
-struct _truncate_ops {
- loc_t loc;
- fd_t *fd;
- off_t offset;
- enum {TRUNCATE, FTRUNCATE} op;
-};
+static pl_fdctx_t *
+pl_new_fdctx ()
+{
+ pl_fdctx_t *fdctx = NULL;
+
+ fdctx = GF_CALLOC (1, sizeof (*fdctx),
+ gf_locks_mt_pl_fdctx_t);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out);
+
+ INIT_LIST_HEAD (&fdctx->locks_list);
+
+out:
+ return fdctx;
+}
+
+static pl_fdctx_t *
+pl_check_n_create_fdctx (xlator_t *this, fd_t *fd)
+{
+ int ret = 0;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("posix-locks", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ ret = __fd_ctx_get (fd, this, &tmp);
+ if ((ret != 0) || (tmp == 0)) {
+ fdctx = pl_new_fdctx ();
+ if (fdctx == NULL) {
+ goto unlock;
+ }
+ }
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long)fdctx);
+ if (ret != 0) {
+ GF_FREE (fdctx);
+ fdctx = NULL;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx");
+ }
+ }
+unlock:
+ UNLOCK (&fd->lock);
+out:
+ return fdctx;
+}
int
pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
}
static int
-truncate_allowed (pl_inode_t *pl_inode,
- transport_t *transport, pid_t client_pid,
- off_t offset)
+truncate_allowed (pl_inode_t *pl_inode,
+ client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, off_t offset)
{
- posix_lock_t *l = NULL;
- posix_lock_t region = {.list = {0, }, };
- int ret = 1;
-
- region.fl_start = offset;
- region.fl_end = LLONG_MAX;
- region.transport = transport;
- region.client_pid = client_pid;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (!l->blocked
- && locks_overlap (&region, l)
- && !same_owner (&region, l)) {
- ret = 0;
- break;
- }
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ posix_lock_t *l = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int ret = 1;
+
+ region.fl_start = offset;
+ region.fl_end = LLONG_MAX;
+ region.client = client;
+ region.client_pid = client_pid;
+ region.owner = *owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (!l->blocked
+ && locks_overlap (&region, l)
+ && !same_owner (&region, l)) {
+ ret = 0;
+ gf_log ("posix-locks", GF_LOG_TRACE, "Truncate "
+ "allowed");
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- return ret;
+ return ret;
}
static int
truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
- struct _truncate_ops *local = NULL;
- inode_t *inode = NULL;
- pl_inode_t *pl_inode = NULL;
+ posix_locks_private_t *priv = NULL;
+ pl_local_t *local = NULL;
+ inode_t *inode = NULL;
+ pl_inode_t *pl_inode = NULL;
- priv = this->private;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "got error (errno=%d, stderror=%s) from child",
- op_errno, strerror (op_errno));
- goto unwind;
- }
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got error (errno=%d, stderror=%s) from child",
+ op_errno, strerror (op_errno));
+ goto unwind;
+ }
- if (local->op == TRUNCATE)
- inode = local->loc.inode;
- else
- inode = local->fd->inode;
+ if (local->op == TRUNCATE)
+ inode = local->loc.inode;
+ else
+ inode = local->fd->inode;
- pl_inode = pl_inode_get (this, inode);
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ pl_inode = pl_inode_get (this, inode);
+ if (!pl_inode) {
op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (priv->mandatory
- && pl_inode->mandatory
- && !truncate_allowed (pl_inode, frame->root->trans,
- frame->root->pid, local->offset)) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (priv->mandatory
+ && pl_inode->mandatory
+ && !truncate_allowed (pl_inode, frame->root->client,
+ frame->root->pid, &frame->root->lk_owner,
+ local->offset)) {
op_ret = -1;
- op_errno = EAGAIN;
- goto unwind;
- }
-
- switch (local->op) {
- case TRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset);
- break;
- case FTRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset);
- break;
- }
-
- return 0;
+ op_errno = EAGAIN;
+ goto unwind;
+ }
-unwind:
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ switch (local->op) {
+ case TRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ &local->loc, local->offset, local->xdata);
+ break;
+ case FTRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ local->fd, local->offset, local->xdata);
+ break;
+ }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ return 0;
+
+unwind:
+ gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, "
+ "error: %s", op_ret, strerror (op_errno));
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata);
+ return 0;
}
int
pl_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = CALLOC (1, sizeof (struct _truncate_ops));
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto unwind;
- }
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = TRUNCATE;
- local->offset = offset;
- loc_copy (&local->loc, loc);
+ local->op = TRUNCATE;
+ local->offset = offset;
+ loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
- frame->local = local;
+ frame->local = local;
- STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc, NULL);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, "
+ "error: %s", loc->path, -1, strerror (ENOMEM));
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
pl_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = CALLOC (1, sizeof (struct _truncate_ops));
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto unwind;
- }
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = FTRUNCATE;
- local->offset = offset;
- local->fd = fd;
+ local->op = FTRUNCATE;
+ local->offset = offset;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
- frame->local = local;
+ frame->local = local;
- STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
- return 0;
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, "
+ "error: %s", -1, strerror (ENOMEM));
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
+int
+pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *l = NULL;
+ int found = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ found = 1;
+ break;
+ }
+ }
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return found;
+}
+
+static void
+delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ if (l->blocked) {
+ list_move_tail (&l->list, &blocked_list);
+ continue;
+ }
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (l, tmp, &blocked_list, list) {
+ list_del_init(&l->list);
+ STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock,
+ NULL);
+ __destroy_lock (l);
+ }
+
+ grant_blocked_locks (this, pl_inode);
+
+ do_blocked_rw (pl_inode);
+
+}
static void
__delete_locks_of_owner (pl_inode_t *pl_inode,
- transport_t *transport, pid_t pid)
+ client_t *client, gf_lkowner_t *owner)
{
- posix_lock_t *tmp = NULL;
- posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ /* TODO: what if it is a blocked lock with pending l->frame */
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
+ if ((l->client == client) &&
+ is_same_lkowner (&l->owner, owner)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ " Flushing lock"
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ l->client_pid,
+ lkowner_utoa (&l->owner),
+ l->user_flock.l_start,
+ l->user_flock.l_len,
+ l->blocked == 1 ? "Blocked" : "Active");
+
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
- /* TODO: what if it is a blocked lock with pending l->frame */
+ return;
+}
- list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if ((l->transport == transport)
- && (l->client_pid == pid)) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
- list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) {
- if ((l->transport == transport)
- && (l->client_pid == pid)) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
+int32_t
+pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
- return;
+}
+
+int32_t
+pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int op_ret = -1;
+ int32_t bcount = 0;
+ int32_t gcount = 0;
+ char key[PATH_MAX] = {0, };
+ char *lk_summary = NULL;
+ pl_inode_t *pl_inode = NULL;
+ dict_t *dict = NULL;
+ clrlk_args args = {0,};
+ char *brickname = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ if (clrlk_parse_args (name, &args)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pl_inode = pl_inode_get (this, loc->inode);
+ if (!pl_inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ switch (args.type) {
+ case CLRLK_INODE:
+ case CLRLK_ENTRY:
+ op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode,
+ &args, &bcount,
+ &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_POSIX:
+ op_ret = clrlk_clear_posixlk (this, pl_inode, &args,
+ &bcount, &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_TYPE_MAX:
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't get brickname");
+ } else {
+ op_ret = format_brickname(brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't format brickname");
+ GF_FREE(brickname);
+ brickname = NULL;
+ }
+ }
+
+ if (!gcount && !bcount) {
+ if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL)? this->name : brickname,
+ (args.type == CLRLK_INODE)? "inode":
+ (args.type == CLRLK_ENTRY)? "entry":
+ (args.type == CLRLK_POSIX)? "posix": " ",
+ bcount, gcount) == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ strncpy (key, name, strlen (name));
+ if (dict_set_dynstr (dict, key, lk_summary)) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ GF_FREE(brickname);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ GF_FREE (args.opts);
+ if (op_ret && lk_summary)
+ GF_FREE (lk_summary);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+
+usual:
+ STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+static int
+format_brickname(char *brickname)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *volume = NULL;
+ char *saveptr = NULL;
+
+ if (!brickname)
+ goto out;
+
+ strtok_r(brickname, ":", &saveptr);
+ hostname = gf_strdup(strtok_r(NULL, ":", &saveptr));
+ if (hostname == NULL)
+ goto out;
+ volume = gf_strdup(strtok_r(NULL, ".", &saveptr));
+ if (volume == NULL)
+ goto out;
+
+ sprintf(brickname, "%s:%s", hostname, volume);
+
+ ret = 0;
+out:
+ GF_FREE(hostname);
+ GF_FREE(volume);
+ return ret;
+}
+
+static int
+fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno,
+ char **brickname)
+{
+ int ret = -1;
+ loc_t loc = {0, };
+ dict_t *dict = NULL;
+
+ if (!brickname)
+ goto out;
+
+ if (!op_errno)
+ goto out;
+
+ uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref (inode);
+
+ ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict,
+ GF_XATTR_PATHINFO_KEY);
+ if (ret < 0) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname);
+ if (ret)
+ goto out;
+
+ *brickname = gf_strdup(*brickname);
+ if (*brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+ loc_wipe(&loc);
+
+ return ret;
}
int
+pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ int ret = -1;
+ posix_locks_private_t *priv = NULL;
+ char *brickname = NULL;
+ char *end = NULL;
+ char *tmp = NULL;
+
+ priv = this->private;
+
+ ret = fetch_pathinfo (this, inode, op_errno, &brickname);
+ if (ret)
+ goto out;
+
+ end = strrchr (brickname, ':');
+ if (!end) {
+ GF_FREE(brickname);
+ ret = -1;
+ goto out;
+ }
+
+ tmp = brickname;
+ brickname = gf_strndup (brickname, (end - brickname));
+ if (brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->brickname = brickname;
+ ret = 0;
+out:
+ GF_FREE(tmp);
+ return ret;
+}
+
+char *
+pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ posix_locks_private_t *priv = NULL;
+ char *key = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (priv->brickname == NULL) {
+ ret = pl_lockinfo_get_brickname (this, inode, op_errno);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot get brickname");
+ goto out;
+ }
+ }
+
+ key = priv->brickname;
+out:
+ return key;
+}
+
+int32_t
+pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ char *key = NULL, *buf = NULL;
+ int32_t op_ret = 0;
+ unsigned long fdnum = 0, len = 0;
+ dict_t *tmp = NULL;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
+ *op_errno = EBADFD;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (!pl_locks_by_fd (pl_inode, fd)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ fdnum = fd_to_fdnum (fd);
+
+ key = pl_lockinfo_key (this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ tmp = dict_new ();
+ if (tmp == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_set_uint64 (tmp, key, fdnum);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ len = dict_serialized_length (tmp);
+ if (len < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialized_length failed (%s) while handling "
+ "lockinfo for fd (ptr:%p inode-gfid:%s)",
+ strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (buf == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_serialize (tmp, buf);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s) while handling lockinfo "
+ "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno),
+ fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ buf = NULL;
+out:
+ if (tmp != NULL) {
+ dict_unref (tmp);
+ }
+
+ if (buf != NULL) {
+ GF_FREE (buf);
+ }
+
+ return op_ret;
+}
+
+
+int32_t
+pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ dict_t *dict = NULL;
+
+ if (!name) {
+ goto usual;
+ }
+
+ if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) {
+ dict = dict_new ();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict,
+ &op_errno);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting lockinfo on fd (ptr:%p inode-gfid:%s) "
+ "failed (%s)", fd, uuid_utoa (fd->inode->gfid),
+ strerror (op_errno));
+ }
+
+ goto unwind;
+ } else {
+ goto usual;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+
+ return 0;
+
+usual:
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
+ int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t newfd_num = 0;
+ posix_lock_t *l = NULL;
+ int32_t op_ret = 0;
+
+ newfd_num = fd_to_fdnum (newfd);
+
+ pl_inode = pl_inode_get (frame->this, newfd->inode);
+ if (pl_inode == NULL) {
+ op_ret = -1;
+ *op_errno = EBADFD;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == oldfd_num) {
+ l->fd_num = newfd_num;
+ l->client = frame->root->client;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
+ int len, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ dict_t *lockinfo = NULL;
+ uint64_t oldfd_num = 0;
+ char *key = NULL;
+
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ key = pl_lockinfo_key (frame->this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num);
+
+ if (oldfd_num == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno);
+ if (op_ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "migration of locks from oldfd (ptr:%p) to newfd "
+ "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd,
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+out:
+ dict_unref (lockinfo);
+
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ void *lockinfo_buf = NULL;
+ int len = 0;
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
+ if (lockinfo_buf == NULL) {
+ goto usual;
+ }
+
+ op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len,
+ &op_errno);
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+usual:
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+pl_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ pl_fdctx_t *fdctx = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (opendir,
+ frame,
+ op_ret,
+ op_errno,
+ fd, xdata);
+ return 0;
+}
+
+int32_t
+pl_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND (frame,
+ pl_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+
+}
+
+int
pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
int
pl_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
+ pl_inode_t *pl_inode = NULL;
- priv = this->private;
+ pl_inode = pl_inode_get (this, fd->inode);
- pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
+ STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL);
+ return 0;
+ }
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
+ pl_trace_flush (this, frame, fd);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __delete_locks_of_owner (pl_inode, frame->root->trans,
- frame->root->pid);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ if (frame->root->lk_owner.len == 0) {
+ /* Handle special case when protocol/server sets lk-owner to zero.
+ * This usually happens due to a client disconnection. Hence, free
+ * all locks opened with this fd.
+ */
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing all locks with fd %p", fd);
+ delete_locks_of_fd (this, pl_inode, fd);
+ goto wind;
- grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX);
- grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL);
+ }
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __delete_locks_of_owner (pl_inode, frame->root->client,
+ &frame->root->lk_owner);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- do_blocked_rw (pl_inode);
+ grant_blocked_locks (this, pl_inode);
- STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd);
- return 0;
+ do_blocked_rw (pl_inode);
+
+wind:
+ STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
}
int
pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ pl_fdctx_t *fdctx = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
int
-pl_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd)
+pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- /* why isn't O_TRUNC being handled ? */
- STACK_WIND (frame, pl_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- loc, flags & ~O_TRUNC, fd);
+ STACK_WIND (frame, pl_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
- return 0;
+ return 0;
}
int
pl_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *buf)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ pl_fdctx_t *fdctx = NULL;
- return 0;
+ if (op_ret < 0)
+ goto unwind;
+
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
}
int
pl_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
- STACK_WIND (frame, pl_create_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+ STACK_WIND (frame, pl_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
int
pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct stat *stbuf,
- struct iobref *iobref)
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
- return 0;
+ return 0;
}
int
-pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- return 0;
+ return 0;
}
void
do_blocked_rw (pl_inode_t *pl_inode)
{
- struct list_head wind_list;
- pl_rw_req_t *rw = NULL;
- pl_rw_req_t *tmp = NULL;
-
- INIT_LIST_HEAD (&wind_list);
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) {
- if (__rw_allowable (pl_inode, &rw->region,
- rw->stub->fop)) {
- list_del_init (&rw->list);
- list_add_tail (&rw->list, &wind_list);
- }
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ struct list_head wind_list;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&wind_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) {
+ if (__rw_allowable (pl_inode, &rw->region,
+ rw->stub->fop)) {
+ list_del_init (&rw->list);
+ list_add_tail (&rw->list, &wind_list);
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- list_for_each_entry_safe (rw, tmp, &wind_list, list) {
- list_del_init (&rw->list);
- call_resume (rw->stub);
- free (rw);
- }
+ list_for_each_entry_safe (rw, tmp, &wind_list, list) {
+ list_del_init (&rw->list);
+ call_resume (rw->stub);
+ GF_FREE (rw);
+ }
- return;
+ return;
}
static int
__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
- glusterfs_fop_t op)
+ glusterfs_fop_t op)
{
- posix_lock_t *l = NULL;
- int ret = 1;
-
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (locks_overlap (l, region) && !same_owner (l, region)) {
- if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
- continue;
- ret = 0;
- break;
- }
- }
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (locks_overlap (l, region) && !same_owner (l, region)) {
+ if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
+ continue;
+ ret = 0;
+ break;
+ }
+ }
- return ret;
+ return ret;
}
int
-pl_readv_cont (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, pl_readv_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ STACK_WIND (frame, pl_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset, flags, xdata);
- return 0;
+ return 0;
}
int
pl_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
-{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- pl_rw_req_t *rw = NULL;
- posix_lock_t region = {.list = {0, }, };
- int op_ret = 0;
- int op_errno = 0;
- char wind_needed = 1;
-
-
- priv = this->private;
- pl_inode = pl_inode_get (this, fd->inode);
-
- if (priv->mandatory && pl_inode->mandatory) {
- region.fl_start = offset;
- region.fl_end = offset + size - 1;
- region.transport = frame->root->trans;
- region.client_pid = frame->root->pid;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- wind_needed = __rw_allowable (pl_inode, &region,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char wind_needed = 1;
+
+
+ priv = this->private;
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + size - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ wind_needed = __rw_allowable (pl_inode, &region,
GF_FOP_READ);
- if (wind_needed) {
- goto unlock;
+ if (wind_needed) {
+ goto unlock;
+ }
+
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "returning EAGAIN as fd is O_NONBLOCK");
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_readv_stub (frame, pl_readv_cont,
+ fd, size, offset, flags,
+ xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
}
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN as fd is O_NONBLOCK");
- op_errno = EAGAIN;
- op_ret = -1;
- goto unlock;
- }
-
- rw = CALLOC (1, sizeof (*rw));
- if (!rw) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- goto unlock;
- }
-
- rw->stub = fop_readv_stub (frame, pl_readv_cont,
- fd, size, offset);
- if (!rw->stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- free (rw);
- goto unlock;
- }
-
- rw->region = region;
-
- list_add_tail (&rw->list, &pl_inode->rw_list);
- }
- unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- }
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
if (wind_needed) {
- STACK_WIND (frame, pl_readv_cbk,
+ STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
}
- if (op_ret == -1)
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno,
+ NULL, 0, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int count, off_t offset,
- struct iobref *iobref)
+ struct iovec *vector, int count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- STACK_WIND (frame, pl_writev_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ STACK_WIND (frame, pl_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
- return 0;
+ return 0;
}
int
pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
-{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- pl_rw_req_t *rw = NULL;
- posix_lock_t region = {.list = {0, }, };
- int op_ret = 0;
- int op_errno = 0;
- char wind_needed = 1;
-
-
- priv = this->private;
- pl_inode = pl_inode_get (this, fd->inode);
-
- if (priv->mandatory && pl_inode->mandatory) {
- region.fl_start = offset;
- region.fl_end = offset + iov_length (vector, count) - 1;
- region.transport = frame->root->trans;
- region.client_pid = frame->root->pid;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- wind_needed = __rw_allowable (pl_inode, &region,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char wind_needed = 1;
+
+ priv = this->private;
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + iov_length (vector, count) - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ wind_needed = __rw_allowable (pl_inode, &region,
GF_FOP_WRITE);
- if (wind_needed)
- goto unlock;
+ if (wind_needed)
+ goto unlock;
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN because fd is "
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "returning EAGAIN because fd is "
"O_NONBLOCK");
- op_errno = EAGAIN;
- op_ret = -1;
- goto unlock;
- }
-
- rw = CALLOC (1, sizeof (*rw));
- if (!rw) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- goto unlock;
- }
-
- rw->stub = fop_writev_stub (frame, pl_writev_cont,
- fd, vector, count, offset,
- iobref);
- if (!rw->stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- free (rw);
- goto unlock;
- }
-
- rw->region = region;
-
- list_add_tail (&rw->list, &pl_inode->rw_list);
- }
- unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- }
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_writev_stub (frame, pl_writev_cont,
+ fd, vector, count, offset,
+ flags, iobref, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
if (wind_needed)
- STACK_WIND (frame, pl_writev_cbk,
+ STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
+
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+static int
+__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd)
+{
+ int found = 0;
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static posix_lock_t *
+lock_dup (posix_lock_t *lock)
+{
+ posix_lock_t *new_lock = NULL;
+
+ new_lock = new_posix_lock (&lock->user_flock, lock->client,
+ lock->client_pid, &lock->owner,
+ (fd_t *)lock->fd_num);
+ return new_lock;
+}
+
+static int
+__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *duplock = NULL;
+ int ret = 0;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ duplock = lock_dup (l);
+ if (!duplock) {
+ ret = -1;
+ break;
+ }
+
+ list_add_tail (&duplock->list, &fdctx->locks_list);
+ }
+ }
+
+ return ret;
+}
+
+static int
+__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ int ret = 0;
+
+ ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret)
+ goto out;
- if (op_ret == -1)
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+out:
+ return ret;
+
+}
- return 0;
+static void
+pl_mark_eol_lock (posix_lock_t *lock)
+{
+ lock->user_flock.l_type = GF_LK_EOL;
+ return;
}
+static posix_lock_t *
+__get_next_fdctx_lock (pl_fdctx_t *fdctx)
+{
+ posix_lock_t *lock = NULL;
+
+ GF_ASSERT (fdctx);
+
+ if (list_empty (&fdctx->locks_list)) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "fdctx lock list empty");
+ goto out;
+ }
+
+ lock = list_entry (fdctx->locks_list.next, typeof (*lock),
+ list);
+
+ GF_ASSERT (lock);
+
+ list_del_init (&lock->list);
+
+out:
+ return lock;
+}
+
+static int
+__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ int ret = 0;
+
+ GF_ASSERT (fdctx);
+
+ lock = __get_next_fdctx_lock (fdctx);
+ if (!lock) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "marking EOL in reqlock");
+ pl_mark_eol_lock (reqlock);
+ goto out;
+ }
+
+ reqlock->user_flock = lock->user_flock;
+ reqlock->fl_start = lock->fl_start;
+ reqlock->fl_type = lock->fl_type;
+ reqlock->fl_end = lock->fl_end;
+ reqlock->owner = lock->owner;
+
+out:
+ if (lock)
+ __destroy_lock (lock);
+
+ return ret;
+}
+
+static int
+pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode,
+ fd_t *fd, posix_lock_t *reqlock)
+{
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (!__fd_has_locks (pl_inode, fd)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p has no active locks", fd);
+ ret = 0;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "There are active locks on fd");
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ fdctx = (pl_fdctx_t *)(long) tmp;
+
+ if (list_empty (&fdctx->locks_list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no fdctx -> copying all locks on fd");
+
+ ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret) {
+ goto unlock;
+ }
+
+ ret = __set_next_lock_fd (fdctx, reqlock);
+
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fdctx present -> returning the next lock");
+ ret = __set_next_lock_fd (fdctx, reqlock);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not get next lock of fd");
+ goto unlock;
+ }
+ }
+ }
+
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+
+}
int
pl_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct flock *flock)
-{
- transport_t *transport = NULL;
- pid_t client_pid = 0;
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- int op_ret = 0;
- int op_errno = 0;
- int can_block = 0;
- posix_lock_t *reqlock = NULL;
- posix_lock_t *conf = NULL;
- int ret = 0;
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- priv = this->private;
-
- pl_inode = pl_inode_get (this, fd->inode);
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- reqlock = new_posix_lock (flock, transport, client_pid);
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- switch (cmd) {
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, fd);
+
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pl_trace_in (this, frame, fd, NULL, cmd, flock, NULL);
+
+ switch (cmd) {
+
+ case F_RESLK_LCKW:
+ can_block = 1;
+
+ /* fall through */
+ case F_RESLK_LCK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ reqlock->frame = frame;
+ reqlock->this = this;
+
+ ret = pl_reserve_setlk (this, pl_inode, reqlock,
+ can_block);
+ if (ret < 0) {
+ if (can_block)
+ goto out;
+
+ op_ret = -1;
+ op_errno = -ret;
+ __destroy_lock (reqlock);
+ goto unwind;
+ }
+ /* Finally a getlk and return the call */
+ conf = pl_getlk (pl_inode, reqlock);
+ if (conf)
+ posix_lock_to_flock (conf, flock);
+ break;
+
+ case F_RESLK_UNLCK:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_reserve_unlock (this, pl_inode, reqlock);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ __destroy_lock (reqlock);
+ goto unwind;
+
+ break;
+
+ case F_GETLK_FD:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ GF_ASSERT (ret >= 0);
+
+ ret = pl_getlk_fd (this, pl_inode, fd, reqlock);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getting locks on fd failed");
+ op_ret = -1;
+ op_errno = ENOLCK;
+ goto unwind;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Replying with a lock on fd for healing");
+
+ posix_lock_to_flock (reqlock, flock);
+ __destroy_lock (reqlock);
+
+ break;
#if F_GETLK != F_GETLK64
- case F_GETLK64:
+ case F_GETLK64:
#endif
- case F_GETLK:
- conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX);
- posix_lock_to_flock (conf, flock);
- __destroy_lock (reqlock);
+ case F_GETLK:
+ conf = pl_getlk (pl_inode, reqlock);
+ posix_lock_to_flock (conf, flock);
+ __destroy_lock (reqlock);
- break;
+ break;
#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
+ case F_SETLKW64:
#endif
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
- reqlock->fd = fd;
+ case F_SETLKW:
+ can_block = 1;
+ reqlock->frame = frame;
+ reqlock->this = this;
- /* fall through */
+ /* fall through */
#if F_SETLK != F_SETLK64
- case F_SETLK64:
+ case F_SETLK64:
#endif
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_setlk (this, pl_inode, reqlock,
- can_block, GF_LOCK_POSIX);
-
- if (ret == -1) {
- if (can_block)
- goto out;
-
- gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
- op_ret = -1;
- op_errno = EAGAIN;
- __destroy_lock (reqlock);
- }
- }
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock blocked due to conflicting reserve lock");
+ goto out;
+ }
+ ret = pl_setlk (this, pl_inode, reqlock,
+ can_block);
+
+ if (ret == -1) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
+ pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock (reqlock);
+
+ } else if ((0 == ret) && (F_UNLCK == flock->l_type)) {
+ /* For NLM's last "unlock on fd" detection */
+ if (pl_locks_by_fd (pl_inode, fd))
+ flock->l_type = F_RDLCK;
+ else
+ flock->l_type = F_UNLCK;
+ }
+ }
unwind:
- STACK_UNWIND (frame, op_ret, op_errno, flock);
+ pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
+ pl_update_refkeeper (this, fd->inode);
+
+
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
out:
- return 0;
+ return 0;
}
/* TODO: this function just logs, no action required?? */
int
pl_forget (xlator_t *this,
- inode_t *inode)
+ inode_t *inode)
{
- pl_inode_t *pl_inode = NULL;
-
+ pl_inode_t *pl_inode = NULL;
+
posix_lock_t *ext_tmp = NULL;
posix_lock_t *ext_l = NULL;
+ struct list_head posixlks_released;
- posix_lock_t *int_tmp = NULL;
- posix_lock_t *int_l = NULL;
+ pl_inode_lock_t *ino_tmp = NULL;
+ pl_inode_lock_t *ino_l = NULL;
+ struct list_head inodelks_released;
pl_rw_req_t *rw_tmp = NULL;
pl_rw_req_t *rw_req = NULL;
pl_entry_lock_t *entry_tmp = NULL;
pl_entry_lock_t *entry_l = NULL;
+ struct list_head entrylks_released;
+
+ pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom_tmp = NULL;
+
+ INIT_LIST_HEAD (&posixlks_released);
+ INIT_LIST_HEAD (&inodelks_released);
+ INIT_LIST_HEAD (&entrylks_released);
+
+ pl_inode = pl_inode_get (this, inode);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
- pl_inode = pl_inode_get (this, inode);
+ if (!list_empty (&pl_inode->rw_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending R/W requests found, releasing.");
- if (!list_empty (&pl_inode->rw_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending R/W requests found, releasing.");
-
- list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,
- list) {
-
- list_del (&rw_req->list);
- FREE (rw_req);
+ list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,
+ list) {
+
+ list_del (&rw_req->list);
+ GF_FREE (rw_req);
+ }
}
- }
- if (!list_empty (&pl_inode->ext_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending fcntl locks found, releasing.");
+ if (!list_empty (&pl_inode->ext_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending fcntl locks found, releasing.");
+
+ list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,
+ list) {
- list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,
- list) {
-
- __delete_lock (pl_inode, ext_l);
- __destroy_lock (ext_l);
+ __delete_lock (pl_inode, ext_l);
+ if (ext_l->blocked) {
+ list_add_tail (&ext_l->list, &posixlks_released);
+ continue;
+ }
+ __destroy_lock (ext_l);
+ }
}
- }
- if (!list_empty (&pl_inode->int_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending inode locks found, releasing.");
- list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list,
- list) {
-
- __delete_lock (pl_inode, int_l);
- __destroy_lock (int_l);
+ list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) {
+
+ if (!list_empty (&dom->inodelk_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending inode locks found, releasing.");
+
+ list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) {
+ __delete_inode_lock (ino_l);
+ __pl_inodelk_unref (ino_l);
+ }
+
+ list_splice_init (&dom->blocked_inodelks, &inodelks_released);
+
+
+ }
+ if (!list_empty (&dom->entrylk_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending entry locks found, releasing.");
+
+ list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) {
+ list_del_init (&entry_l->domain_list);
+
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
+ GF_FREE (entry_l);
+ }
+
+ list_splice_init (&dom->blocked_entrylks, &entrylks_released);
+ }
+
+ list_del (&dom->inode_list);
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ " Cleaning up domain: %s", dom->domain);
+ GF_FREE ((char *)(dom->domain));
+ GF_FREE (dom);
}
- }
- if (!list_empty (&pl_inode->dir_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending entry locks found, releasing.");
-
- list_for_each_entry_safe (entry_l, entry_tmp,
- &pl_inode->dir_list, inode_list) {
-
- list_del (&entry_l->inode_list);
- FREE (entry_l);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) {
+
+ STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0,
+ &ext_l->user_flock, NULL);
+ __destroy_lock (ext_l);
+ }
+
+ list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) {
+
+ STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref (ino_l);
+ }
+
+ list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) {
+
+ STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
+ GF_FREE (entry_l);
+
+ }
+
+ GF_FREE (pl_inode);
+
+ return 0;
+}
+
+int
+pl_release (xlator_t *this, fd_t *fd)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ if (fd == NULL) {
+ goto out;
+ }
+
+ ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode);
+ if (ret != 0)
+ goto out;
+
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+
+ pl_trace_release (this, fd);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing all locks with fd %p", fd);
+
+ delete_locks_of_fd (this, pl_inode, fd);
+ pl_update_refkeeper (this, fd->inode);
+
+ ret = fd_ctx_del (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not get fdctx");
+ goto out;
+ }
+
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE (fdctx);
+out:
+ return ret;
+}
+
+int
+pl_releasedir (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ if (fd == NULL) {
+ goto out;
+ }
+
+ ret = fd_ctx_del (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not get fdctx");
+ goto out;
+ }
+
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE (fdctx);
+out:
+ return ret;
+}
+
+int32_t
+__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
+{
+ posix_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry (lock, &pl_inode->ext_list, list) {
+
+ count++;
+ }
+
+ return count;
+}
+
+int32_t
+get_posixlk_count (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ count =__get_posixlk_count (this, pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+out:
+ return count;
+}
+
+void
+pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent,
+ char *basename, dict_t *dict)
+{
+ uint32_t entrylk = 0;
+ int ret = -1;
+
+ if (!parent || !basename || !strlen (basename))
+ goto out;
+ entrylk = check_entrylk_on_basename (this, parent, basename);
+out:
+ ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK);
+ }
+}
+
+void
+pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
+ dict_t *dict)
+{
+ int32_t count = 0;
+ int ret = -1;
+
+ count = get_entrylk_count (this, inode);
+ ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+}
+
+void
+pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,
+ gf_boolean_t per_dom)
+{
+ int32_t count = 0;
+ int ret = -1;
+ char *domname = NULL;
+
+
+ if (per_dom){
+ ret = dict_get_str (dict, GLUSTERFS_INODELK_DOM_COUNT,
+ &domname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "value for key %s",GLUSTERFS_INODELK_DOM_COUNT);
+ goto out;
}
- }
+ }
+
+ count = get_inodelk_count (this, inode, domname);
- FREE (pl_inode);
+ ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for "
+ "key %s", GLUSTERFS_INODELK_COUNT);
+ }
- return 0;
+out:
+ return;
+}
+
+void
+pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,
+ dict_t *dict)
+{
+ int32_t count = 0;
+ int ret = -1;
+
+ count = get_posixlk_count (this, inode);
+ ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT);
+ }
+
+}
+
+int32_t
+pl_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ dict_t *xdata,
+ struct iatt *postparent)
+{
+ pl_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+
+ if (op_ret)
+ goto out;
+
+ local = frame->local;
+
+ if (local->parent_entrylk_req)
+ pl_parent_entrylk_xattr_fill (this, local->loc.parent,
+ (char*)local->loc.name, xdata);
+ if (local->entrylk_count_req)
+ pl_entrylk_xattr_fill (this, inode, xdata);
+ if (local->inodelk_count_req)
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_true);
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill (this, inode, xdata);
+
+
+out:
+ local = frame->local;
+ frame->local = NULL;
+
+ if (local != NULL) {
+ loc_wipe (&local->loc);
+ mem_put (local);
+ }
+
+ STACK_UNWIND_STRICT (
+ lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+pl_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (xdata) {
+ if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT))
+ local->entrylk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_INODELK_COUNT))
+ local->inodelk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT))
+ local->posixlk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK))
+ local->parent_entrylk_req = 1;
+ }
+
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame,
+ pl_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+ ret = 0;
+out:
+ if (ret == -1)
+ STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+int
+pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
+
+ local = frame->local;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (local->entrylk_count_req)
+ pl_entrylk_xattr_fill (this, entry->inode, entry->dict);
+ if (local->inodelk_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_true);
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill (this, entry->inode, entry->dict);
+ }
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ if (local)
+ mem_put (local);
+
+ return 0;
+}
+
+int
+pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ pl_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (dict) {
+ if (dict_get (dict, GLUSTERFS_ENTRYLK_COUNT))
+ local->entrylk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_COUNT))
+ local->inodelk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_POSIXLK_COUNT))
+ local->posixlk_count_req = 1;
+ }
+
+ frame->local = local;
+
+ STACK_WIND (frame, pl_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
}
void
-pl_dump_inode_priv (inode_t *inode)
+pl_dump_lock (char *str, int size, struct gf_flock *flock,
+ gf_lkowner_t *owner, void *trans, char *conn_id,
+ time_t *granted_time, time_t *blkd_time, gf_boolean_t active)
{
+ char *type_str = NULL;
+ char granted[32] = {0,};
+ char blocked[32] = {0,};
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
- int ret = -1;
- uint64_t tmp_pl_inode = 0;
- pl_inode_t *pl_inode = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
+ if (active) {
+ if (blkd_time && *blkd_time == 0) {
+ snprintf (str, size, RANGE_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (granted_time, granted));
+ } else {
+ snprintf (str, size, RANGE_BLKD_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked),
+ ctime_r (granted_time, granted));
+ }
+ }
+ else {
+ snprintf (str, size, RANGE_BLKD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked));
+ }
+
+}
+
+void
+__dump_entrylks (pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *lock = NULL;
+ char blocked[32] = {0,};
+ char granted[32] = {0,};
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ char tmp[256];
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+
+ count = 0;
+
+ gf_proc_dump_build_key(key,
+ "lock-dump.domain",
+ "domain");
+ gf_proc_dump_write(key, "%s", dom->domain);
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+
+ gf_proc_dump_build_key(key,
+ "xlator.feature.locks.lock-dump.domain.entrylk",
+ "entrylk[%d](ACTIVE)", count );
+ if (lock->blkd_time.tv_sec == 0 && lock->blkd_time.tv_usec == 0) {
+ snprintf (tmp, 256, ENTRY_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->granted_time.tv_sec, granted));
+ } else {
+ snprintf (tmp, 256, ENTRY_BLKD_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked),
+ ctime_r (&lock->granted_time.tv_sec, granted));
+ }
+
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+
+ gf_proc_dump_build_key(key,
+ "xlator.feature.locks.lock-dump.domain.entrylk",
+ "entrylk[%d](BLOCKED)", count );
+ snprintf (tmp, 256, ENTRY_BLKD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked));
+
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ }
+
+}
+
+void
+dump_entrylks (pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __dump_entrylks (pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+}
+
+void
+__dump_inodelks (pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ pl_inode_lock_t *lock = NULL;
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ char tmp[256];
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+
+ count = 0;
+
+ gf_proc_dump_build_key(key,
+ "lock-dump.domain",
+ "domain");
+ gf_proc_dump_write(key, "%s", dom->domain);
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+
+ gf_proc_dump_build_key(key,
+ "inodelk",
+ "inodelk[%d](ACTIVE)",count );
+
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ pl_dump_lock (tmp, 256, &lock->user_flock,
+ &lock->owner,
+ lock->client, lock->connection_id,
+ &lock->granted_time.tv_sec,
+ &lock->blkd_time.tv_sec,
+ _gf_true);
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+
+ gf_proc_dump_build_key(key,
+ "inodelk",
+ "inodelk[%d](BLOCKED)",count );
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ pl_dump_lock (tmp, 256, &lock->user_flock,
+ &lock->owner,
+ lock->client, lock->connection_id,
+ 0, &lock->blkd_time.tv_sec,
+ _gf_false);
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ }
+
+}
+
+void
+dump_inodelks (pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __dump_inodelks (pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+}
+
+void
+__dump_posixlks (pl_inode_t *pl_inode)
+{
+ posix_lock_t *lock = NULL;
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ char tmp[256];
+
+ list_for_each_entry (lock, &pl_inode->ext_list, list) {
+
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ gf_proc_dump_build_key(key,
+ "posixlk",
+ "posixlk[%d](%s)",
+ count,
+ lock->blocked ? "BLOCKED" : "ACTIVE");
+ pl_dump_lock (tmp, 256, &lock->user_flock,
+ &lock->owner, lock->client, NULL,
+ &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
+ (lock->blocked)? _gf_false: _gf_true);
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+}
+
+void
+dump_posixlks (pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __dump_posixlks (pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+}
+
+int32_t
+pl_dump_inode_priv (xlator_t *this, inode_t *inode)
+{
+
+ int ret = -1;
+ uint64_t tmp_pl_inode = 0;
+ pl_inode_t *pl_inode = NULL;
+ char *pathname = NULL;
+ gf_boolean_t section_added = _gf_false;
- if (!inode)
- return;
+ int count = 0;
- ret = inode_ctx_get (inode, inode->table->xl, &tmp_pl_inode);
+ if (!inode) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = TRY_LOCK (&inode->lock);
+ if (ret)
+ goto out;
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (ret)
+ goto out;
- if (ret != 0)
- return;
-
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ if (!pl_inode) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name);
+ section_added = _gf_true;
+
+ /*We are safe to call __inode_path since we have the
+ * inode->table->lock */
+ __inode_path (inode, NULL, &pathname);
+ if (pathname)
+ gf_proc_dump_write ("path", "%s", pathname);
+
+ gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory);
+
+ ret = pthread_mutex_trylock (&pl_inode->mutex);
+ if (ret)
+ goto out;
+ {
+ count = __get_entrylk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("entrylk-count", "%d", count);
+ __dump_entrylks (pl_inode);
+ }
+
+ count = __get_inodelk_count (this, pl_inode, NULL);
+ if (count) {
+ gf_proc_dump_write("inodelk-count", "%d", count);
+ __dump_inodelks (pl_inode);
+ }
- if (!pl_inode)
- return;
+ count = __get_posixlk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("posixlk-count", "%d", count);
+ __dump_posixlks (pl_inode);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.inode",
- "%ld.%s",inode->ino, "mandatory");
- gf_proc_dump_write(key, "%d", pl_inode->mandatory);
+out:
+ GF_FREE (pathname);
+
+ if (ret && inode) {
+ if (!section_added)
+ gf_proc_dump_add_section ("xlator.features.locks.%s."
+ "inode", this->name);
+ gf_proc_dump_write ("Unable to print lock state", "(Lock "
+ "acquisition failure) %s",
+ uuid_utoa (inode->gfid));
+ }
+ return ret;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+ if (!this)
+ return ret;
-/*
- * pl_dump_inode - inode dump function for posix locks
- *
- */
-int
-pl_dump_inode (xlator_t *this)
+ ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+pl_ctx_t*
+pl_ctx_get (client_t *client, xlator_t *xlator)
+{
+ void *tmp = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ client_ctx_get (client, xlator, &tmp);
+
+ ctx = tmp;
+
+ if (ctx != NULL)
+ goto out;
+
+ ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t);
+
+ if (ctx == NULL)
+ goto out;
+
+ ctx->ltable = pl_lock_table_new();
+
+ if (ctx->ltable == NULL) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ goto out;
+ }
+
+ LOCK_INIT (&ctx->ltable_lock);
+
+ if (client_ctx_set (client, xlator, ctx) != 0) {
+ LOCK_DESTROY (&ctx->ltable_lock);
+ GF_FREE (ctx->ltable);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+out:
+ return ctx;
+}
+
+static void
+ltable_delete_locks (struct _lock_table *ltable)
{
+ struct _locker *locker = NULL;
+ struct _locker *tmp = NULL;
+
+ list_for_each_entry_safe (locker, tmp, &ltable->inodelk_lockers, lockers) {
+ if (locker->fd)
+ pl_del_locker (ltable, locker->volume, &locker->loc,
+ locker->fd, &locker->owner,
+ GF_FOP_INODELK);
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
- assert(this);
-
- if (this->itable) {
- inode_table_dump(this->itable,
- "xlator.features.locks.inode_table",
- pl_dump_inode_priv);
+ list_for_each_entry_safe (locker, tmp, &ltable->entrylk_lockers, lockers) {
+ if (locker->fd)
+ pl_del_locker (ltable, locker->volume, &locker->loc,
+ locker->fd, &locker->owner,
+ GF_FOP_ENTRYLK);
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
}
+ GF_FREE (ltable);
+}
- return 0;
+
+static int32_t
+destroy_cbk (xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+ pl_ctx_t *locks_ctx = NULL;
+
+ client_ctx_del (client, this, &tmp);
+
+ if (tmp == NULL)
+ return 0
+;
+ locks_ctx = tmp;
+ if (locks_ctx->ltable)
+ ltable_delete_locks (locks_ctx->ltable);
+
+ LOCK_DESTROY (&locks_ctx->ltable_lock);
+ GF_FREE (locks_ctx);
+
+ return 0;
}
+static int32_t
+disconnect_cbk (xlator_t *this, client_t *client)
+{
+ int32_t ret = 0;
+ pl_ctx_t *locks_ctx = NULL;
+ struct _lock_table *ltable = NULL;
+
+ locks_ctx = pl_ctx_get (client, this);
+ if (locks_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK (&locks_ctx->ltable_lock);
+ {
+ if (locks_ctx->ltable) {
+ ltable = locks_ctx->ltable;
+ locks_ctx->ltable = pl_lock_table_new ();
+ }
+ }
+ UNLOCK (&locks_ctx->ltable_lock);
+
+ if (ltable)
+ ltable_delete_locks (ltable);
+
+out:
+ return ret;
+}
+
int
init (xlator_t *this)
{
- posix_locks_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- data_t *mandatory = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "FATAL: posix-locks should have exactly one child");
- return -1;
- }
+ posix_locks_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ data_t *mandatory = NULL;
+ data_t *trace = NULL;
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: posix-locks should have exactly one child");
+ goto out;
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. Please check the volume file.");
- }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling. Please check the volume file.");
+ }
- trav = this->children;
- while (trav->xlator->children)
- trav = trav->xlator->children;
+ trav = this->children;
+ while (trav->xlator->children)
+ trav = trav->xlator->children;
- if (strncmp ("storage/", trav->xlator->type, 8)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "'locks' translator is not loaded over a storage "
+ if (strncmp ("storage/", trav->xlator->type, 8)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "'locks' translator is not loaded over a storage "
"translator");
- return -1;
- }
+ goto out;
+ }
- priv = CALLOC (1, sizeof (*priv));
+ priv = GF_CALLOC (1, sizeof (*priv),
+ gf_locks_mt_posix_locks_private_t);
+
+ mandatory = dict_get (this->options, "mandatory-locks");
+ if (mandatory)
+ gf_log (this->name, GF_LOG_WARNING,
+ "mandatory locks not supported in this minor release.");
+
+ trace = dict_get (this->options, "trace");
+ if (trace) {
+ if (gf_string2boolean (trace->data,
+ &priv->trace) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'trace' takes on only boolean values.");
+ goto out;
+ }
+ }
- mandatory = dict_get (this->options, "mandatory-locks");
- if (mandatory) {
- if (gf_string2boolean (mandatory->data,
- &priv->mandatory) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'mandatory-locks' takes on only boolean "
- "values.");
- return -1;
- }
- }
+ this->local_pool = mem_pool_new (pl_local_t, 32);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
- this->private = priv;
- return 0;
+ this->private = priv;
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE (priv);
+ }
+ return ret;
}
int
fini (xlator_t *this)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = NULL;
- priv = this->private;
- free (priv);
+ priv = this->private;
+ if (!priv)
+ return 0;
+ this->private = NULL;
+ GF_FREE (priv->brickname);
+ GF_FREE (priv);
- return 0;
+ return 0;
}
int
-pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock);
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
-pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock);
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
-pl_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int
-pl_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
struct xlator_fops fops = {
- .create = pl_create,
- .truncate = pl_truncate,
- .ftruncate = pl_ftruncate,
- .open = pl_open,
- .readv = pl_readv,
- .writev = pl_writev,
- .lk = pl_lk,
- .inodelk = pl_inodelk,
- .finodelk = pl_finodelk,
- .entrylk = pl_entrylk,
- .fentrylk = pl_fentrylk,
- .flush = pl_flush,
-};
-
-
-struct xlator_mops mops = {
+ .lookup = pl_lookup,
+ .create = pl_create,
+ .truncate = pl_truncate,
+ .ftruncate = pl_ftruncate,
+ .open = pl_open,
+ .readv = pl_readv,
+ .writev = pl_writev,
+ .lk = pl_lk,
+ .inodelk = pl_inodelk,
+ .finodelk = pl_finodelk,
+ .entrylk = pl_entrylk,
+ .fentrylk = pl_fentrylk,
+ .flush = pl_flush,
+ .opendir = pl_opendir,
+ .readdirp = pl_readdirp,
+ .getxattr = pl_getxattr,
+ .fgetxattr = pl_fgetxattr,
+ .fsetxattr = pl_fsetxattr,
};
struct xlator_dumpops dumpops = {
- .inode = pl_dump_inode,
+ .inodectx = pl_dump_inode_priv,
};
struct xlator_cbks cbks = {
- .forget = pl_forget,
+ .forget = pl_forget,
+ .release = pl_release,
+ .releasedir = pl_releasedir,
+ .client_destroy = destroy_cbk,
+ .client_disconnect = disconnect_cbk,
};
struct volume_options options[] = {
- { .key = { "mandatory-locks", "mandatory" },
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} },
+ { .key = { "mandatory-locks", "mandatory" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = { "trace" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
new file mode 100644
index 000000000..11abd26d8
--- /dev/null
+++ b/xlators/features/locks/src/reservelk.c
@@ -0,0 +1,443 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+void
+__delete_reserve_lock (posix_lock_t *lock)
+{
+ list_del (&lock->list);
+}
+
+void
+__destroy_reserve_lock (posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+/* Return true if the two reservelks have exactly same lock boundaries */
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2)
+{
+ if ((l1->fl_start == l2->fl_start) &&
+ (l1->fl_end == l2->fl_end))
+ return 1;
+
+ return 0;
+}
+
+/* Determine if lock is grantable or not */
+static posix_lock_t *
+__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ xlator_t *this = NULL;
+ posix_lock_t *l = NULL;
+ posix_lock_t *ret_lock = NULL;
+
+ this = THIS;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No reservelks in list");
+ goto out;
+ }
+ list_for_each_entry (l, &pl_inode->reservelk_list, list){
+ if (reservelks_equal (lock, l)) {
+ ret_lock = l;
+ break;
+ }
+ }
+out:
+ return ret_lock;
+}
+
+static inline int
+__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return (is_same_lkowner (&l1->owner, &l2->owner));
+
+}
+
+static posix_lock_t *
+__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "reservelk list empty");
+ return NULL;
+ }
+
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "equal reservelk found");
+ break;
+ }
+ }
+
+ return l;
+}
+
+static int
+__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock)
+{
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ conf = __matching_reservelk (pl_inode, lock);
+ if (conf) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Matching reservelk found");
+ if (__same_owner_reservelk (lock, conf)) {
+ list_del_init (&conf->list);
+ gf_log (this->name, GF_LOG_TRACE,
+ "Removing the matching reservelk for setlk to progress");
+ GF_FREE (conf);
+ ret = 0;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Conflicting reservelk found");
+ ret = 1;
+ }
+
+ }
+ return ret;
+
+}
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (__reservelk_conflict (this, pl_inode, lock)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Found conflicting reservelk. Blocking until reservelk is unlocked.");
+ lock->blocked = can_block;
+ list_add_tail (&lock->list, &pl_inode->blocked_calls);
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return ret;
+
+}
+
+
+/* Determines if lock can be granted and adds the lock. If the lock
+ * is blocking, adds it to the blocked_reservelks.
+ */
+static int
+__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ posix_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ conf = __reservelk_grantable (pl_inode, lock);
+ if (conf){
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ list_add_tail (&lock->list, &pl_inode->blocked_reservelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+
+ list_add (&lock->list, &pl_inode->reservelk_list);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static posix_lock_t *
+find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+ posix_lock_t *l = NULL;
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock))
+ return l;
+ }
+ return NULL;
+}
+
+/* Set F_UNLCK removes a lock which has the exact same lock boundaries
+ * as the UNLCK lock specifies. If such a lock is not found, returns invalid
+ */
+static posix_lock_t *
+__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+
+ posix_lock_t *conf = NULL;
+
+ conf = find_matching_reservelk (lock, pl_inode);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock not found for unlock");
+ goto out;
+ }
+ __delete_reserve_lock (conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock");
+
+out:
+ return conf;
+
+
+}
+
+static void
+__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = __lock_reservelk (this, pl_inode, bl, 1);
+
+ if (bl_ret == 0) {
+ list_add (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+/* Grant all reservelks blocked on lock(s) */
+void
+grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&granted);
+
+ if (list_empty (&pl_inode->blocked_reservelks)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked locks to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_reserve_locks (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock,
+ NULL);
+ }
+
+}
+
+static void
+__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked);
+
+ if (bl_ret == 0) {
+ list_add_tail (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+void
+grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+ fd_t *fd = NULL;
+
+ int can_block = 0;
+ int32_t cmd = 0;
+ int ret = 0;
+
+ if (list_empty (&pl_inode->blocked_calls)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked lock calls to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_lock_calls (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ fd = fd_from_fdnum (lock);
+
+ if (lock->blocked) {
+ can_block = 1;
+ cmd = F_SETLKW;
+ }
+ else
+ cmd = F_SETLK;
+
+ lock->blocked = 0;
+ ret = pl_setlk (this, pl_inode, lock, can_block);
+ if (ret == -1) {
+ if (can_block) {
+ pl_trace_block (this, lock->frame, fd, NULL,
+ cmd, &lock->user_flock, NULL);
+ continue;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ pl_trace_out (this, lock->frame, fd, NULL, cmd,
+ &lock->user_flock, -1, EAGAIN, NULL);
+ pl_update_refkeeper (this, fd->inode);
+ STACK_UNWIND_STRICT (lk, lock->frame, -1,
+ EAGAIN, &lock->user_flock,
+ NULL);
+ __destroy_lock (lock);
+ }
+ }
+
+ }
+
+}
+
+
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *retlock = NULL;
+ int ret = -1;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ retlock = __reserve_unlock_lock (this, lock, pl_inode);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Reservelk Unlock successful");
+ __destroy_reserve_lock (retlock);
+ ret = 0;
+ }
+out:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_reserve_locks (this, pl_inode);
+ grant_blocked_lock_calls (this, pl_inode);
+
+ return ret;
+
+}
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ int ret = -EINVAL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ ret = __lock_reservelk (this, pl_inode, lock, can_block);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->fl_start,
+ lock->fl_end);
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+}