diff options
Diffstat (limited to 'xlators/features/locks')
| -rw-r--r-- | xlators/features/locks/src/Makefile.am | 17 | ||||
| -rw-r--r-- | xlators/features/locks/src/clear.c | 424 | ||||
| -rw-r--r-- | xlators/features/locks/src/clear.h | 76 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.c | 1414 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.h | 179 | ||||
| -rw-r--r-- | xlators/features/locks/src/entrylk.c | 848 | ||||
| -rw-r--r-- | xlators/features/locks/src/inodelk.c | 825 | ||||
| -rw-r--r-- | xlators/features/locks/src/internal.c | 896 | ||||
| -rw-r--r-- | xlators/features/locks/src/locks-mem-types.h | 29 | ||||
| -rw-r--r-- | xlators/features/locks/src/locks.h | 195 | ||||
| -rw-r--r-- | xlators/features/locks/src/posix.c | 3051 | ||||
| -rw-r--r-- | xlators/features/locks/src/reservelk.c | 443 | ||||
| -rw-r--r-- | xlators/features/locks/tests/unit-test.c | 22 |
13 files changed, 6438 insertions, 1981 deletions
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index ec4a953eb..0f79731b4 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -1,15 +1,18 @@ xlator_LTLIBRARIES = locks.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -locks_la_LDFLAGS = -module -avoidversion +locks_la_LDFLAGS = -module -avoid-version -locks_la_SOURCES = common.c posix.c internal.c -locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \ + clear.c +locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = locks.h common.h +noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + + +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) CLEANFILES = @@ -17,4 +20,4 @@ uninstall-local: rm -f $(DESTDIR)$(xlatordir)/posix-locks.so install-data-hook: - ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
\ No newline at end of file + ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c new file mode 100644 index 000000000..124b9ad0f --- /dev/null +++ b/xlators/features/locks/src/clear.c @@ -0,0 +1,424 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" + +#include "locks.h" +#include "common.h" +#include "statedump.h" +#include "clear.h" + +int +clrlk_get_kind (char *kind) +{ + char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted", + "all"}; + int ret_kind = CLRLK_KIND_MAX; + int i = 0; + + for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) { + if (!strcmp (clrlk_kinds[i], kind)) { + ret_kind = i; + break; + } + } + + return ret_kind; +} + +int +clrlk_get_type (char *type) +{ + char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"}; + int ret_type = CLRLK_TYPE_MAX; + int i = 0; + + for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) { + if (!strcmp (clrlk_types[i], type)) { + ret_type = i; + break; + } + } + + return ret_type; +} + +int +clrlk_get_lock_range (char *range_str, struct gf_flock *ulock, + gf_boolean_t *chk_range) +{ + int ret = -1; + + if (!chk_range) + goto out; + + if (!range_str) { + ret = 0; + *chk_range = _gf_false; + goto out; + } + + if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence, + &ulock->l_start, &ulock->l_len) != 3) { + goto out; + } + + ret = 0; + *chk_range = _gf_true; +out: + return ret; +} + +int +clrlk_parse_args (const char* cmd, clrlk_args *args) +{ + char *opts = NULL; + char *cur = NULL; + char *tok = NULL; + char *sptr = NULL; + char *free_ptr = NULL; + char kw[KW_MAX] = {[KW_TYPE] = 't', + [KW_KIND] = 'k', + }; + int ret = -1; + int i = 0; + + GF_ASSERT (cmd); + free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char); + if (!opts) + goto out; + + if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) { + ret = -1; + goto out; + } + + /*clr_lk_prefix.ttype.kkind.args, args - type specific*/ + cur = opts; + for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr)); + cur = NULL, i++) { + if (tok[0] != kw[i]) { + ret = -1; + goto out; + } + if (i == KW_TYPE) + args->type = clrlk_get_type (tok+1); + if (i == KW_KIND) + args->kind = clrlk_get_kind (tok+1); + } + + if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX)) + goto out; + + /*optional args, neither range nor basename can 'legally' contain + * "/" in them*/ + tok = strtok_r (NULL, "/", &sptr); + if (tok) + args->opts = gf_strdup (tok); + + ret = 0; +out: + GF_FREE (free_ptr); + return ret; +} + +int +clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + int *blkd, int *granted, int *op_errno) +{ + posix_lock_t *plock = NULL; + posix_lock_t *tmp = NULL; + struct gf_flock ulock = {0, }; + int ret = -1; + int bcount = 0; + int gcount = 0; + gf_boolean_t chk_range = _gf_false; + + if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) { + *op_errno = EINVAL; + goto out; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list, + list) { + if ((plock->blocked && + !(args->kind & CLRLK_BLOCKED)) || + (!plock->blocked && + !(args->kind & CLRLK_GRANTED))) + continue; + + if (chk_range && + (plock->user_flock.l_whence != ulock.l_whence + || plock->user_flock.l_start != ulock.l_start + || plock->user_flock.l_len != ulock.l_len)) + continue; + + list_del_init (&plock->list); + if (plock->blocked) { + bcount++; + pl_trace_out (this, plock->frame, NULL, NULL, + F_SETLKW, &plock->user_flock, + -1, EAGAIN, NULL); + + STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN, + &plock->user_flock, NULL); + + } else { + gcount++; + } + GF_FREE (plock); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + grant_blocked_locks (this, pl_inode); + ret = 0; +out: + *blkd = bcount; + *granted = gcount; + return ret; +} + +/* Returns 0 on success and -1 on failure */ +int +clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno) +{ + pl_inode_lock_t *ilock = NULL; + pl_inode_lock_t *tmp = NULL; + struct gf_flock ulock = {0, }; + int ret = -1; + int bcount = 0; + int gcount = 0; + gf_boolean_t chk_range = _gf_false; + struct list_head released; + + INIT_LIST_HEAD (&released); + if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) { + *op_errno = EINVAL; + goto out; + } + + if (args->kind & CLRLK_BLOCKED) + goto blkd; + + if (args->kind & CLRLK_GRANTED) + goto granted; + +blkd: + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks, + blocked_locks) { + if (chk_range && + (ilock->user_flock.l_whence != ulock.l_whence + || ilock->user_flock.l_start != ulock.l_start + || ilock->user_flock.l_len != ulock.l_len)) + continue; + + bcount++; + list_del_init (&ilock->blocked_locks); + list_add (&ilock->blocked_locks, &released); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) { + list_del_init (&ilock->blocked_locks); + pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW, + &ilock->user_flock, -1, EAGAIN, + ilock->volume); + STACK_UNWIND_STRICT (inodelk, ilock->frame, -1, + EAGAIN, NULL); + //No need to take lock as the locks are only in one list + __pl_inodelk_unref (ilock); + } + + if (!(args->kind & CLRLK_GRANTED)) { + ret = 0; + goto out; + } + +granted: + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list, + list) { + if (chk_range && + (ilock->user_flock.l_whence != ulock.l_whence + || ilock->user_flock.l_start != ulock.l_start + || ilock->user_flock.l_len != ulock.l_len)) + continue; + + gcount++; + list_del_init (&ilock->list); + list_add (&ilock->list, &released); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (ilock, tmp, &released, list) { + list_del_init (&ilock->list); + //No need to take lock as the locks are only in one list + __pl_inodelk_unref (ilock); + } + + ret = 0; +out: + grant_blocked_inode_locks (this, pl_inode, dom); + *blkd = bcount; + *granted = gcount; + return ret; +} + +/* Returns 0 on success and -1 on failure */ +int +clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno) +{ + pl_entry_lock_t *elock = NULL; + pl_entry_lock_t *tmp = NULL; + int bcount = 0; + int gcount = 0; + int ret = -1; + struct list_head removed; + struct list_head released; + + INIT_LIST_HEAD (&released); + if (args->kind & CLRLK_BLOCKED) + goto blkd; + + if (args->kind & CLRLK_GRANTED) + goto granted; + +blkd: + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks, + blocked_locks) { + if (args->opts) { + if (!elock->basename || + strcmp (elock->basename, args->opts)) + continue; + } + + bcount++; + + list_del_init (&elock->blocked_locks); + list_add_tail (&elock->blocked_locks, &released); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (elock, tmp, &released, blocked_locks) { + list_del_init (&elock->blocked_locks); + entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL, + elock->basename, ENTRYLK_LOCK, elock->type, + -1, EAGAIN); + STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL); + GF_FREE ((char *) elock->basename); + GF_FREE (elock->connection_id); + GF_FREE (elock); + } + + if (!(args->kind & CLRLK_GRANTED)) { + ret = 0; + goto out; + } + +granted: + INIT_LIST_HEAD (&removed); + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (elock, tmp, &dom->entrylk_list, + domain_list) { + if (args->opts) { + if (!elock->basename || + strcmp (elock->basename, args->opts)) + continue; + } + + gcount++; + list_del_init (&elock->domain_list); + list_add_tail (&elock->domain_list, &removed); + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (elock, tmp, &removed, domain_list) { + grant_blocked_entry_locks (this, pl_inode, elock, dom); + } + + ret = 0; +out: + *blkd = bcount; + *granted = gcount; + return ret; +} + +int +clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode, + clrlk_args *args, int *blkd, int *granted, + int *op_errno) +{ + pl_dom_list_t *dom = NULL; + int ret = -1; + int tmp_bcount = 0; + int tmp_gcount = 0; + + if (list_empty (&pl_inode->dom_list)) { + ret = 0; + goto out; + } + + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + tmp_bcount = tmp_gcount = 0; + + switch (args->type) + { + case CLRLK_INODE: + ret = clrlk_clear_inodelk (this, pl_inode, dom, args, + &tmp_bcount, &tmp_gcount, + op_errno); + if (ret) + goto out; + break; + case CLRLK_ENTRY: + ret = clrlk_clear_entrylk (this, pl_inode, dom, args, + &tmp_bcount, &tmp_gcount, + op_errno); + if (ret) + goto out; + break; + } + + *blkd += tmp_bcount; + *granted += tmp_gcount; + } + + ret = 0; +out: + return ret; +} diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h new file mode 100644 index 000000000..511f3f74a --- /dev/null +++ b/xlators/features/locks/src/clear.h @@ -0,0 +1,76 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __CLEAR_H__ +#define __CLEAR_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "compat-errno.h" +#include "stack.h" +#include "call-stub.h" +#include "locks.h" + +typedef enum { + CLRLK_INODE, + CLRLK_ENTRY, + CLRLK_POSIX, + CLRLK_TYPE_MAX +} clrlk_type; + +typedef enum { + CLRLK_BLOCKED = 1, + CLRLK_GRANTED, + CLRLK_ALL, + CLRLK_KIND_MAX +} clrlk_kind; + +typedef enum { + KW_TYPE, + KW_KIND, + /*add new keywords here*/ + KW_MAX +} clrlk_opts; + +struct _clrlk_args; +typedef struct _clrlk_args clrlk_args; + +struct _clrlk_args { + int type; + int kind; + char *opts; +}; + +int +clrlk_get__kind (char *kind); +int +clrlk_get_type (char *type); +int +clrlk_get_lock_range (char *range_str, struct gf_flock *ulock, + gf_boolean_t *chk_range); +int +clrlk_parse_args (const char* cmd, clrlk_args *args); + +int +clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + int *blkd, int *granted, int *op_errno); +int +clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno); +int +clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno); +int +clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode, + clrlk_args *args, int *blkd, int *granted, + int *op_errno); +#endif /* __CLEAR_H__ */ diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 1f10aa20c..b3309580d 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <unistd.h> #include <fcntl.h> #include <limits.h> @@ -35,83 +25,477 @@ #include "common-utils.h" #include "locks.h" +#include "common.h" static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom); +__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom); +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock); +static int +pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock); + +static pl_dom_list_t * +__allocate_domain (const char *volume) +{ + pl_dom_list_t *dom = NULL; -#define DOMAIN_HEAD(pl_inode, dom) (dom == GF_LOCK_POSIX \ - ? &pl_inode->ext_list \ - : &pl_inode->int_list) + dom = GF_CALLOC (1, sizeof (*dom), + gf_locks_mt_pl_dom_list_t); + if (!dom) + goto out; -pl_inode_t * -pl_inode_get (xlator_t *this, inode_t *inode) + dom->domain = gf_strdup(volume); + if (!dom->domain) + goto out; + + gf_log ("posix-locks", GF_LOG_TRACE, + "New domain allocated: %s", dom->domain); + + INIT_LIST_HEAD (&dom->inode_list); + INIT_LIST_HEAD (&dom->entrylk_list); + INIT_LIST_HEAD (&dom->blocked_entrylks); + INIT_LIST_HEAD (&dom->inodelk_list); + INIT_LIST_HEAD (&dom->blocked_inodelks); + +out: + if (dom && (NULL == dom->domain)) { + GF_FREE (dom); + dom = NULL; + } + + return dom; +} + +/* Returns domain for the lock. If domain is not present, + * allocates a domain and returns it + */ +pl_dom_list_t * +get_domain (pl_inode_t *pl_inode, const char *volume) { - uint64_t tmp_pl_inode = 0; - pl_inode_t *pl_inode = NULL; - mode_t st_mode = 0; - int ret = 0; + pl_dom_list_t *dom = NULL; + + GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out); + GF_VALIDATE_OR_GOTO ("posix-locks", volume, out); - ret = inode_ctx_get (inode, this,&tmp_pl_inode); - if (ret == 0) { - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - goto out; + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + if (strcmp (dom->domain, volume) == 0) + goto unlock; + } + + dom = __allocate_domain (volume); + if (dom) + list_add (&dom->inode_list, &pl_inode->dom_list); + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + if (dom) { + gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume); + } else { + gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume); } - pl_inode = CALLOC (1, sizeof (*pl_inode)); - if (!pl_inode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } +out: + return dom; +} - st_mode = inode->st_mode; - if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) - pl_inode->mandatory = 1; +unsigned long +fd_to_fdnum (fd_t *fd) +{ + return ((unsigned long) fd); +} +fd_t * +fd_from_fdnum (posix_lock_t *lock) +{ + return ((fd_t *) lock->fd_num); +} - pthread_mutex_init (&pl_inode->mutex, NULL); +int +__pl_inode_is_empty (pl_inode_t *pl_inode) +{ + pl_dom_list_t *dom = NULL; + int is_empty = 1; - INIT_LIST_HEAD (&pl_inode->dir_list); - INIT_LIST_HEAD (&pl_inode->ext_list); - INIT_LIST_HEAD (&pl_inode->int_list); - INIT_LIST_HEAD (&pl_inode->rw_list); + if (!list_empty (&pl_inode->ext_list)) + is_empty = 0; - ret = inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + if (!list_empty (&dom->entrylk_list)) + is_empty = 0; -out: - return pl_inode; + if (!list_empty (&dom->inodelk_list)) + is_empty = 0; + } + + return is_empty; +} + +void +pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame) +{ + snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu", + (unsigned long long) frame->root->pid, + lkowner_utoa (&frame->root->lk_owner), + frame->root->client, + (unsigned long long) frame->root->unique); +} + + +void +pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc) +{ + inode_t *inode = NULL; + char *ipath = NULL; + int ret = 0; + + if (fd) + inode = fd->inode; + if (loc) + inode = loc->inode; + + if (!inode) { + snprintf (str, size, "<nul>"); + return; + } + + if (loc && loc->path) { + ipath = gf_strdup (loc->path); + } else { + ret = inode_path (inode, NULL, &ipath); + if (ret <= 0) + ipath = NULL; + } + + snprintf (str, size, "gfid=%s, fd=%p, path=%s", + uuid_utoa (inode->gfid), fd, + ipath ? ipath : "<nul>"); + + GF_FREE (ipath); +} + + +void +pl_print_lock (char *str, int size, int cmd, + struct gf_flock *flock, gf_lkowner_t *owner) +{ + char *cmd_str = NULL; + char *type_str = NULL; + + switch (cmd) { +#if F_GETLK != F_GETLK64 + case F_GETLK64: +#endif + case F_GETLK: + cmd_str = "GETLK"; + break; + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + cmd_str = "SETLK"; + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + cmd_str = "SETLKW"; + break; + + default: + cmd_str = "UNKNOWN"; + break; + } + + switch (flock->l_type) { + case F_RDLCK: + type_str = "READ"; + break; + case F_WRLCK: + type_str = "WRITE"; + break; + case F_UNLCK: + type_str = "UNLOCK"; + break; + default: + type_str = "UNKNOWN"; + break; + } + + snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, " + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", + cmd_str, type_str, (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner)); +} + + +void +pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain) +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk (pl_lock, 256, cmd, flock, domain); + else + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + + gf_log (this->name, GF_LOG_INFO, + "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", + pl_locker, pl_lockee, pl_lock); +} + + +void +pl_print_verdict (char *str, int size, int op_ret, int op_errno) +{ + char *verdict = NULL; + + if (op_ret == 0) { + verdict = "GRANTED"; + } else { + switch (op_errno) { + case EAGAIN: + verdict = "TRYAGAIN"; + break; + default: + verdict = strerror (op_errno); + } + } + + snprintf (str, size, "%s", verdict); +} + + +void +pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain) + +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + char verdict[32]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk (pl_lock, 256, cmd, flock, domain); + else + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + + pl_print_verdict (verdict, 32, op_ret, op_errno); + + gf_log (this->name, GF_LOG_INFO, + "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", + verdict, pl_locker, pl_lockee, pl_lock); +} + + +void +pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain) + +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk (pl_lock, 256, cmd, flock, domain); + else + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + + gf_log (this->name, GF_LOG_INFO, + "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", + pl_locker, pl_lockee, pl_lock); +} + + +void +pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd) +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + pl_inode_t *pl_inode = NULL; + + priv = this->private; + + if (!priv->trace) + return; + + pl_inode = pl_inode_get (this, fd->inode); + + if (pl_inode && __pl_inode_is_empty (pl_inode)) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, NULL); + + gf_log (this->name, GF_LOG_INFO, + "[FLUSH] Locker = {%s} Lockee = {%s}", + pl_locker, pl_lockee); +} + +void +pl_trace_release (xlator_t *this, fd_t *fd) +{ + posix_locks_private_t *priv = NULL; + char pl_lockee[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_lockee (pl_lockee, 256, fd, NULL); + + gf_log (this->name, GF_LOG_INFO, + "[RELEASE] Lockee = {%s}", pl_lockee); +} + + +void +pl_update_refkeeper (xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + int is_empty = 0; + int need_unref = 0; + int need_ref = 0; + + pl_inode = pl_inode_get (this, inode); + + pthread_mutex_lock (&pl_inode->mutex); + { + is_empty = __pl_inode_is_empty (pl_inode); + + if (is_empty && pl_inode->refkeeper) { + need_unref = 1; + pl_inode->refkeeper = NULL; + } + + if (!is_empty && !pl_inode->refkeeper) { + need_ref = 1; + pl_inode->refkeeper = inode; + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + if (need_unref) + inode_unref (inode); + + if (need_ref) + inode_ref (inode); +} + + +pl_inode_t * +pl_inode_get (xlator_t *this, inode_t *inode) +{ + uint64_t tmp_pl_inode = 0; + pl_inode_t *pl_inode = NULL; + int ret = 0; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto unlock; + } + pl_inode = GF_CALLOC (1, sizeof (*pl_inode), + gf_locks_mt_pl_inode_t); + if (!pl_inode) { + goto unlock; + } + + gf_log (this->name, GF_LOG_TRACE, + "Allocating new pl inode"); + + pthread_mutex_init (&pl_inode->mutex, NULL); + + INIT_LIST_HEAD (&pl_inode->dom_list); + INIT_LIST_HEAD (&pl_inode->ext_list); + INIT_LIST_HEAD (&pl_inode->rw_list); + INIT_LIST_HEAD (&pl_inode->reservelk_list); + INIT_LIST_HEAD (&pl_inode->blocked_reservelks); + INIT_LIST_HEAD (&pl_inode->blocked_calls); + + __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); + } +unlock: + UNLOCK (&inode->lock); + + return pl_inode; } /* Create a new posix_lock_t */ posix_lock_t * -new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid) +new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd) { - posix_lock_t *lock = NULL; + posix_lock_t *lock = NULL; - lock = CALLOC (1, sizeof (posix_lock_t)); - if (!lock) { - return NULL; - } + GF_VALIDATE_OR_GOTO ("posix-locks", flock, out); + GF_VALIDATE_OR_GOTO ("posix-locks", client, out); + GF_VALIDATE_OR_GOTO ("posix-locks", fd, out); - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; + lock = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!lock) { + goto out; + } - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; - lock->transport = transport; - lock->client_pid = client_pid; + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; - INIT_LIST_HEAD (&lock->list); + lock->client = client; + lock->fd_num = fd_to_fdnum (fd); + lock->fd = fd; + lock->client_pid = client_pid; + lock->owner = *owner; - return lock; + INIT_LIST_HEAD (&lock->list); + +out: + return lock; } @@ -119,7 +503,7 @@ new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid) void __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) { - list_del_init (&lock->list); + list_del_init (&lock->list); } @@ -127,32 +511,37 @@ __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) void __destroy_lock (posix_lock_t *lock) { - free (lock); + GF_FREE (lock); } -/* Convert a posix_lock to a struct flock */ +/* Convert a posix_lock to a struct gf_flock */ void -posix_lock_to_flock (posix_lock_t *lock, struct flock *flock) +posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock) { - flock->l_pid = lock->client_pid; - flock->l_type = lock->fl_type; - flock->l_start = lock->fl_start; - - if (lock->fl_end == LLONG_MAX) - flock->l_len = 0; - else - flock->l_len = lock->fl_end - lock->fl_start + 1; + flock->l_pid = lock->client_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + flock->l_owner = lock->owner; + + if (lock->fl_end == LLONG_MAX) + flock->l_len = 0; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; } - /* Insert the lock into the inode's lock list */ static void -__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) { - list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom)); + if (lock->blocked) + gettimeofday (&lock->blkd_time, NULL); + else + gettimeofday (&lock->granted_time, NULL); - return; + list_add_tail (&lock->list, &pl_inode->ext_list); + + return; } @@ -160,14 +549,14 @@ __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) int locks_overlap (posix_lock_t *l1, posix_lock_t *l2) { - /* - Note: - FUSE always gives us absolute offsets, so no need to worry - about SEEK_CUR or SEEK_END - */ - - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); } @@ -175,24 +564,26 @@ locks_overlap (posix_lock_t *l1, posix_lock_t *l2) int same_owner (posix_lock_t *l1, posix_lock_t *l2) { - return ((l1->client_pid == l2->client_pid) && - (l1->transport == l2->transport)); + + return (is_same_lkowner (&l1->owner, &l2->owner) && + (l1->client == l2->client)); + } /* Delete all F_UNLCK locks */ void -__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom) +__delete_unlck_locks (pl_inode_t *pl_inode) { - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - - list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { - if (l->fl_type == F_UNLCK) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } - } + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if (l->fl_type == F_UNLCK) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } } @@ -200,130 +591,202 @@ __delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom) static posix_lock_t * add_locks (posix_lock_t *l1, posix_lock_t *l2) { - posix_lock_t *sum = NULL; + posix_lock_t *sum = NULL; - sum = CALLOC (1, sizeof (posix_lock_t)); - if (!sum) - return NULL; + sum = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!sum) + return NULL; - sum->fl_start = min (l1->fl_start, l2->fl_start); - sum->fl_end = max (l1->fl_end, l2->fl_end); + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); - return sum; + return sum; } /* Subtract two locks */ struct _values { - posix_lock_t *locks[3]; + posix_lock_t *locks[3]; }; /* {big} must always be contained inside {small} */ static struct _values subtract_locks (posix_lock_t *big, posix_lock_t *small) { - struct _values v = { .locks = {0, 0, 0} }; - - if ((big->fl_start == small->fl_start) && - (big->fl_end == small->fl_end)) { - /* both edges coincide with big */ - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_type = small->fl_type; - } - else if ((small->fl_start > big->fl_start) && - (small->fl_end < big->fl_end)) { - /* both edges lie inside big */ - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - v.locks[2] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[2]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - memcpy (v.locks[2], big, sizeof (posix_lock_t)); - v.locks[2]->fl_start = small->fl_end + 1; - } - /* one edge coincides with big */ - else if (small->fl_start == big->fl_start) { - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_start = small->fl_end + 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - } - else if (small->fl_end == big->fl_end) { - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - } - else { - gf_log ("posix-locks", GF_LOG_ERROR, - "Unexpected case in subtract_locks. Please send " - "a bug report to gluster-devel@nongnu.org"); - } - - return v; -} - -/* - Start searching from {begin}, and return the first lock that - conflicts, NULL if no conflict - If {begin} is NULL, then start from the beginning of the list + + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + goto done; + } + + if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + + memcpy (v.locks[2], big, sizeof (posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + goto done; + + } + + /* one edge coincides with big */ + if (small->fl_start == big->fl_start) { + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + goto done; + } + + if (small->fl_end == big->fl_end) { + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + goto done; + } + + GF_ASSERT (0); + gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks"); + +out: + if (v.locks[0]) { + GF_FREE (v.locks[0]); + v.locks[0] = NULL; + } + if (v.locks[1]) { + GF_FREE (v.locks[1]); + v.locks[1] = NULL; + } + if (v.locks[2]) { + GF_FREE (v.locks[2]); + v.locks[2] = NULL; + } + +done: + return v; +} + +static posix_lock_t * +first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + posix_lock_t *conf = NULL; + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (l->blocked) + continue; + + if (locks_overlap (l, lock)) { + if (same_owner (l, lock)) + continue; + + if ((l->fl_type == F_WRLCK) || + (lock->fl_type == F_WRLCK)) { + conf = l; + goto unlock; + } + } + } + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + return conf; +} + +/* + Start searching from {begin}, and return the first lock that + conflicts, NULL if no conflict + If {begin} is NULL, then start from the beginning of the list */ static posix_lock_t * -first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom) +first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; + posix_lock_t *l = NULL; - list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { - if (l->blocked) - continue; + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (l->blocked) + continue; - if (locks_overlap (l, lock)) - return l; - } + if (locks_overlap (l, lock)) + return l; + } - return NULL; + return NULL; } /* Return true if lock is grantable */ static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom) +__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; - int ret = 1; - - list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { - if (!l->blocked && locks_overlap (lock, l)) { - if (((l->fl_type == F_WRLCK) - || (lock->fl_type == F_WRLCK)) - && (lock->fl_type != F_UNLCK) - && !same_owner (l, lock)) { - ret = 0; - break; - } - } - } - return ret; + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (!l->blocked && locks_overlap (lock, l)) { + if (((l->fl_type == F_WRLCK) + || (lock->fl_type == F_WRLCK)) + && (lock->fl_type != F_UNLCK) + && !same_owner (l, lock)) { + ret = 0; + break; + } + } + } + return ret; } @@ -331,231 +794,428 @@ extern void do_blocked_rw (pl_inode_t *); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom) +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *conf = NULL; - posix_lock_t *t = NULL; - posix_lock_t *sum = NULL; - int i = 0; - struct _values v = { .locks = {0, 0, 0} }; + posix_lock_t *conf = NULL; + posix_lock_t *t = NULL; + posix_lock_t *sum = NULL; + int i = 0; + struct _values v = { .locks = {0, 0, 0} }; + + list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) { + if (conf->blocked) + continue; + if (!locks_overlap (conf, lock)) + continue; + + if (same_owner (conf, lock)) { + if (conf->fl_type == lock->fl_type) { + sum = add_locks (lock, conf); + + sum->fl_type = lock->fl_type; + sum->client = lock->client; + sum->fd_num = lock->fd_num; + sum->client_pid = lock->client_pid; + sum->owner = lock->owner; + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __destroy_lock (lock); + INIT_LIST_HEAD (&sum->list); + posix_lock_to_flock (sum, &sum->user_flock); + __insert_and_merge (pl_inode, sum); + + return; + } else { + sum = add_locks (lock, conf); + + sum->fl_type = conf->fl_type; + sum->client = conf->client; + sum->fd_num = conf->fd_num; + sum->client_pid = conf->client_pid; + sum->owner = conf->owner; + + v = subtract_locks (sum, lock); + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __delete_lock (pl_inode, lock); + __destroy_lock (lock); + + __destroy_lock (sum); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + INIT_LIST_HEAD (&v.locks[i]->list); + posix_lock_to_flock (v.locks[i], + &v.locks[i]->user_flock); + __insert_and_merge (pl_inode, + v.locks[i]); + } + + __delete_unlck_locks (pl_inode); + return; + } + } + + if (lock->fl_type == F_UNLCK) { + continue; + } + + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + __insert_lock (pl_inode, lock); + return; + } + } - list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) { - if (!locks_overlap (conf, lock)) - continue; + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + __insert_lock (pl_inode, lock); + } else { + __destroy_lock (lock); + } +} - if (same_owner (conf, lock)) { - if (conf->fl_type == lock->fl_type) { - sum = add_locks (lock, conf); - sum->fl_type = lock->fl_type; - sum->transport = lock->transport; - sum->client_pid = lock->client_pid; +void +__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted) +{ + struct list_head tmp_list; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *conf = NULL; + + INIT_LIST_HEAD (&tmp_list); + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if (l->blocked) { + conf = first_overlap (pl_inode, l); + if (conf) + continue; + + l->blocked = 0; + list_move_tail (&l->list, &tmp_list); + } + } - __delete_lock (pl_inode, conf); - __destroy_lock (conf); + list_for_each_entry_safe (l, tmp, &tmp_list, list) { + list_del_init (&l->list); - __destroy_lock (lock); - __insert_and_merge (pl_inode, sum, dom); + if (__is_lock_grantable (pl_inode, l)) { + conf = GF_CALLOC (1, sizeof (*conf), + gf_locks_mt_posix_lock_t); - return; - } else { - sum = add_locks (lock, conf); + if (!conf) { + l->blocked = 1; + __insert_lock (pl_inode, l); + continue; + } - sum->fl_type = conf->fl_type; - sum->transport = conf->transport; - sum->client_pid = conf->client_pid; + conf->frame = l->frame; + l->frame = NULL; - v = subtract_locks (sum, lock); - - __delete_lock (pl_inode, conf); - __destroy_lock (conf); + posix_lock_to_flock (l, &conf->user_flock); - __delete_lock (pl_inode, lock); - __destroy_lock (lock); + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", + l->client_pid, lkowner_utoa (&l->owner), + l->user_flock.l_start, + l->user_flock.l_len); - __destroy_lock (sum); + __insert_and_merge (pl_inode, l); - for (i = 0; i < 3; i++) { - if (!v.locks[i]) - continue; + list_add (&conf->list, granted); + } else { + l->blocked = 1; + __insert_lock (pl_inode, l); + } + } +} - if (v.locks[i]->fl_type == F_UNLCK) { - __destroy_lock (v.locks[i]); - continue; - } - __insert_and_merge (pl_inode, - v.locks[i], dom); - } - __delete_unlck_locks (pl_inode, dom); - return; - } - } +void +grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; - if (lock->fl_type == F_UNLCK) { - continue; - } + INIT_LIST_HEAD (&granted_list); - if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { - __insert_lock (pl_inode, lock, dom); - return; - } - } + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_locks (this, pl_inode, &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); - /* no conflicts, so just insert */ - if (lock->fl_type != F_UNLCK) { - __insert_lock (pl_inode, lock, dom); - } else { - __destroy_lock (lock); - } -} + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); + pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, 0, 0, NULL); -void -__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, - gf_lk_domain_t dom, struct list_head *granted) + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, + &lock->user_flock, NULL); + + GF_FREE (lock); + } + + return; +} + +static int +pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock) { - struct list_head tmp_list; - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - posix_lock_t *conf = NULL; + struct gf_flock flock = {0,}; + posix_lock_t *unlock_lock = NULL; - INIT_LIST_HEAD (&tmp_list); + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; - list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { - if (l->blocked) { - conf = first_overlap (pl_inode, l, dom); - if (conf) - continue; + int ret = -1; - l->blocked = 0; - list_move_tail (&l->list, &tmp_list); - } - } + INIT_LIST_HEAD (&granted_list); - list_for_each_entry_safe (l, tmp, &tmp_list, list) { - list_del_init (&l->list); + flock.l_type = F_UNLCK; + flock.l_whence = old_lock->user_flock.l_whence; + flock.l_start = old_lock->user_flock.l_start; + flock.l_len = old_lock->user_flock.l_len; - if (__is_lock_grantable (pl_inode, l, dom)) { - conf = CALLOC (1, sizeof (*conf)); - if (!conf) { - l->blocked = 1; - __insert_lock (pl_inode, l, dom); - continue; - } + unlock_lock = new_posix_lock (&flock, old_lock->client, + old_lock->client_pid, &old_lock->owner, + old_lock->fd); + GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out); + ret = 0; - conf->frame = l->frame; - l->frame = NULL; + __insert_and_merge (pl_inode, unlock_lock); - posix_lock_to_flock (l, &conf->user_flock); + __grant_blocked_locks (this, pl_inode, &granted_list); - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => Granted", - l->fl_type == F_UNLCK ? "Unlock" : "Lock", - l->client_pid, - l->user_flock.l_start, - l->user_flock.l_len); + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); - __insert_and_merge (pl_inode, l, dom); + pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, 0, 0, NULL); - list_add (&conf->list, granted); - } else { - l->blocked = 1; - __insert_lock (pl_inode, l, dom); - } - } + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, + &lock->user_flock, NULL); + + GF_FREE (lock); + } + +out: + return ret; } +int +pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + int ret = 0; + + errno = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + /* Send unlock before the actual lock to + prevent lock upgrade / downgrade + problems only if: + - it is a blocking call + - it has other conflicting locks + */ + + if (can_block && + !(__is_lock_grantable (pl_inode, lock))) { + ret = pl_send_prelock_unlock (this, pl_inode, + lock); + if (ret) + gf_log (this->name, GF_LOG_DEBUG, + "Could not send pre-lock " + "unlock"); + } + + if (__is_lock_grantable (pl_inode, lock)) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + __insert_and_merge (pl_inode, lock); + } else if (can_block) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + lock->blocked = 1; + __insert_lock (pl_inode, lock); + ret = -1; + } else { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + errno = EAGAIN; + ret = -1; + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_locks (this, pl_inode); -void -grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom) + do_blocked_rw (pl_inode); + + return ret; +} + + +posix_lock_t * +pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) { - struct list_head granted_list; - posix_lock_t *tmp = NULL; - posix_lock_t *lock = NULL; + posix_lock_t *conf = NULL; - INIT_LIST_HEAD (&granted_list); + conf = first_conflicting_overlap (pl_inode, lock); - pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_locks (this, pl_inode, dom, &granted_list); - } - pthread_mutex_unlock (&pl_inode->mutex); + if (conf == NULL) { + lock->fl_type = F_UNLCK; + return lock; + } - list_for_each_entry_safe (lock, tmp, &granted_list, list) { - list_del_init (&lock->list); + return conf; +} - STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock); - FREE (lock); - } +struct _lock_table * +pl_lock_table_new (void) +{ + struct _lock_table *new = NULL; - return; + new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table); + if (new == NULL) { + goto out; + } + INIT_LIST_HEAD (&new->entrylk_lockers); + INIT_LIST_HEAD (&new->inodelk_lockers); + LOCK_INIT (&new->lock); +out: + return new; } int -pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block, gf_lk_domain_t dom) -{ - int ret = 0; - - errno = 0; - - pthread_mutex_lock (&pl_inode->mutex); - { - if (__is_lock_grantable (pl_inode, lock, dom)) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->user_flock.l_start, - lock->user_flock.l_len); - __insert_and_merge (pl_inode, lock, dom); - } else if (can_block) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->user_flock.l_start, - lock->user_flock.l_len); - lock->blocked = 1; - __insert_lock (pl_inode, lock, dom); - ret = -1; - } else { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->user_flock.l_start, - lock->user_flock.l_len); - errno = EAGAIN; - ret = -1; - } - } - pthread_mutex_unlock (&pl_inode->mutex); - - grant_blocked_locks (this, pl_inode, dom); - - do_blocked_rw (pl_inode); - - return ret; -} +pl_add_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner, + glusterfs_fop_t type) +{ + int32_t ret = -1; + struct _locker *new = NULL; + GF_VALIDATE_OR_GOTO ("lock-table", table, out); + GF_VALIDATE_OR_GOTO ("lock-table", volume, out); -posix_lock_t * -pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) + new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker); + if (new == NULL) { + goto out; + } + INIT_LIST_HEAD (&new->lockers); + + new->volume = gf_strdup (volume); + + if (fd == NULL) { + loc_copy (&new->loc, loc); + } else { + new->fd = fd_ref (fd); + } + + new->pid = pid; + new->owner = *owner; + + LOCK (&table->lock); + { + if (type == GF_FOP_ENTRYLK) + list_add_tail (&new->lockers, &table->entrylk_lockers); + else + list_add_tail (&new->lockers, &table->inodelk_lockers); + } + UNLOCK (&table->lock); +out: + return ret; +} + +int +pl_del_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type) { - posix_lock_t *conf = NULL; + struct _locker *locker = NULL; + struct _locker *tmp = NULL; + int32_t ret = -1; + struct list_head *head = NULL; + struct list_head del; + + GF_VALIDATE_OR_GOTO ("lock-table", table, out); + GF_VALIDATE_OR_GOTO ("lock-table", volume, out); + + INIT_LIST_HEAD (&del); + + LOCK (&table->lock); + { + if (type == GF_FOP_ENTRYLK) { + head = &table->entrylk_lockers; + } else { + head = &table->inodelk_lockers; + } + + list_for_each_entry_safe (locker, tmp, head, lockers) { + if (!is_same_lkowner (&locker->owner, owner) || + strcmp (locker->volume, volume)) + continue; + + /* + * It is possible for inodelk lock to come on anon-fd + * and inodelk unlock to come on normal fd in case of + * client re-opens. So don't check for fds to be equal. + */ + if (locker->fd && fd) + list_move_tail (&locker->lockers, &del); + else if (locker->loc.inode && loc && + (locker->loc.inode == loc->inode)) + list_move_tail (&locker->lockers, &del); + } + } + UNLOCK (&table->lock); - conf = first_overlap (pl_inode, lock, dom); + tmp = NULL; + locker = NULL; - if (conf == NULL) { - lock->fl_type = F_UNLCK; - return lock; - } + list_for_each_entry_safe (locker, tmp, &del, lockers) { + list_del_init (&locker->lockers); + if (locker->fd) + fd_unref (locker->fd); + else + loc_wipe (&locker->loc); + + GF_FREE (locker->volume); + GF_FREE (locker); + } + + ret = 0; +out: + return ret; - return conf; } + diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index ee17b0087..db19ec978 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -1,43 +1,71 @@ /* - Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef __COMMON_H__ #define __COMMON_H__ +#include "lkowner.h" +/*dump locks format strings */ +#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu" +#define ENTRY_FMT "type=%s on basename=%s" +#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p" +#define GRNTD_AT "granted at %s" +#define BLKD_AT "blocked at %s" +#define CONN_ID "connection-id=%s" +#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT +#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT +#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT + +#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT +#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT +#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT + +#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT +#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT +#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT + +#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid) + +struct _locker { + struct list_head lockers; + char *volume; + loc_t loc; + fd_t *fd; + gf_lkowner_t owner; + pid_t pid; +}; + +struct _lock_table { + struct list_head inodelk_lockers; + struct list_head entrylk_lockers; + gf_lock_t lock; +}; + posix_lock_t * -new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid); +new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd); pl_inode_t * pl_inode_get (xlator_t *this, inode_t *inode); posix_lock_t * -pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain); +pl_getlk (pl_inode_t *inode, posix_lock_t *lock); int pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, - int can_block, gf_lk_domain_t domain); + int can_block); void -grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain); +grant_blocked_locks (xlator_t *this, pl_inode_t *inode); void -posix_lock_to_flock (posix_lock_t *lock, struct flock *flock); +posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock); int locks_overlap (posix_lock_t *l1, posix_lock_t *l2); @@ -49,4 +77,111 @@ void __delete_lock (pl_inode_t *, posix_lock_t *); void __destroy_lock (posix_lock_t *); +pl_dom_list_t * +get_domain (pl_inode_t *pl_inode, const char *volume); + +void +grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom); + +void +__delete_inode_lock (pl_inode_lock_t *lock); + +void +__pl_inodelk_unref (pl_inode_lock_t *lock); + +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *unlocked, pl_dom_list_t *dom); + +void pl_update_refkeeper (xlator_t *this, inode_t *inode); + +int32_t +__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname); +int32_t +get_inodelk_count (xlator_t *this, inode_t *inode, char *domname); + +int32_t +__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode); +int32_t +get_entrylk_count (xlator_t *this, inode_t *inode); + +void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain); + +void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain); + +void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain); + +void pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd); + +void entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +void entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, + int op_ret, int op_errno); + +void entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +void +pl_print_verdict (char *str, int size, int op_ret, int op_errno); + +void +pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc); + +void +pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame); + +void +pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain); + +void +pl_trace_release (xlator_t *this, fd_t *fd); + +unsigned long +fd_to_fdnum (fd_t *fd); + +fd_t * +fd_from_fdnum (posix_lock_t *lock); + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block); +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2); + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock, int can_block); +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock); + +uint32_t +check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename); + +int32_t +pl_add_locker (struct _lock_table *table, const char *volume, + loc_t *loc, + fd_t *fd, + pid_t pid, + gf_lkowner_t *owner, + glusterfs_fop_t type); + +int32_t +pl_del_locker (struct _lock_table *table, const char *volume, + loc_t *loc, + fd_t *fd, + gf_lkowner_t *owner, + glusterfs_fop_t type); + +struct _lock_table * +pl_lock_table_new (void); + #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c new file mode 100644 index 000000000..0785dc547 --- /dev/null +++ b/xlators/features/locks/src/entrylk.c @@ -0,0 +1,848 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +static pl_entry_lock_t * +new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, + client_t *client, pid_t client_pid, gf_lkowner_t *owner, + const char *volume) + +{ + pl_entry_lock_t *newlock = NULL; + + newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t), + gf_locks_mt_pl_entry_lock_t); + if (!newlock) { + goto out; + } + + newlock->basename = basename ? gf_strdup (basename) : NULL; + newlock->type = type; + newlock->trans = client; + newlock->volume = volume; + newlock->client_pid = client_pid; + newlock->owner = *owner; + + INIT_LIST_HEAD (&newlock->domain_list); + INIT_LIST_HEAD (&newlock->blocked_locks); + +out: + return newlock; +} + + +/** + * all_names - does a basename represent all names? + * @basename: name to check + */ + +#define all_names(basename) ((basename == NULL) ? 1 : 0) + +/** + * names_conflict - do two names conflict? + * @n1: name + * @n2: name + */ + +static int +names_conflict (const char *n1, const char *n2) +{ + return all_names (n1) || all_names (n2) || !strcmp (n1, n2); +} + + +static inline int +__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) +{ + + return (is_same_lkowner (&l1->owner, &l2->owner) && + (l1->trans == l2->trans)); +} + + +/** + * lock_grantable - is this lock grantable? + * @inode: inode in which to look + * @basename: name we're trying to lock + * @type: type of lock + */ +static pl_entry_lock_t * +__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + + if (list_empty (&dom->entrylk_list)) + return NULL; + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (names_conflict (lock->basename, basename)) + return lock; + } + + return NULL; +} + +static pl_entry_lock_t * +__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + + if (list_empty (&dom->blocked_entrylks)) + return NULL; + + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (names_conflict (lock->basename, basename)) + return lock; + } + + return NULL; +} + +static int +__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock) +{ + pl_entry_lock_t *lock = NULL; + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (__same_entrylk_owner (lock, newlock)) + return 1; + } + + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (__same_entrylk_owner (lock, newlock)) + return 1; + } + + return 0; +} + +static int +names_equal (const char *n1, const char *n2) +{ + return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); +} + +void +pl_print_entrylk (char *str, int size, entrylk_cmd cmd, entrylk_type type, + const char *basename, const char *domain) +{ + char *cmd_str = NULL; + char *type_str = NULL; + + switch (cmd) { + case ENTRYLK_LOCK: + cmd_str = "LOCK"; + break; + + case ENTRYLK_LOCK_NB: + cmd_str = "LOCK_NB"; + break; + + case ENTRYLK_UNLOCK: + cmd_str = "UNLOCK"; + break; + + default: + cmd_str = "UNKNOWN"; + break; + } + + switch (type) { + case ENTRYLK_RDLCK: + type_str = "READ"; + break; + case ENTRYLK_WRLCK: + type_str = "WRITE"; + break; + default: + type_str = "UNKNOWN"; + break; + } + + snprintf (str, size, "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s", + cmd_str, type_str, basename, domain); +} + + +void +entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain, + fd_t *fd, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); + + gf_log (this->name, GF_LOG_INFO, + "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", + pl_locker, pl_lockee, pl_entrylk); +} + + +void +entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain, + fd_t *fd, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, int op_ret, int op_errno) +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; + char verdict[32]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); + pl_print_verdict (verdict, 32, op_ret, op_errno); + + gf_log (this->name, GF_LOG_INFO, + "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", + verdict, pl_locker, pl_lockee, pl_entrylk); +} + + +void +entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) + +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume); + + gf_log (this->name, GF_LOG_INFO, + "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", + pl_locker, pl_lockee, pl_entrylk); +} + +/** + * __find_most_matching_lock - find the lock struct which most matches in order of: + * lock on the exact basename || + * an all_names lock + * + * + * @inode: inode in which to look + * @basename: name to search for + */ + +static pl_entry_lock_t * +__find_most_matching_lock (pl_dom_list_t *dom, const char *basename) +{ + pl_entry_lock_t *lock; + pl_entry_lock_t *all = NULL; + pl_entry_lock_t *exact = NULL; + + if (list_empty (&dom->entrylk_list)) + return NULL; + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (all_names (lock->basename)) + all = lock; + else if (names_equal (lock->basename, basename)) + exact = lock; + } + + return (exact ? exact : all); +} + +/** + * __lock_name - lock a name in a directory + * @inode: inode for the directory in which to lock + * @basename: name of the entry to lock + * if null, lock the entire directory + * + * the entire directory being locked is represented as: a single + * pl_entry_lock_t present in the entrylk_locks list with its + * basename = NULL + */ + +int +__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, + call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, + int nonblock, char *conn_id) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *conf = NULL; + int ret = -EINVAL; + + lock = new_entrylk_lock (pinode, basename, type, + frame->root->client, frame->root->pid, + &frame->root->lk_owner, dom->domain); + if (!lock) { + ret = -ENOMEM; + goto out; + } + + lock->frame = frame; + lock->this = this; + lock->trans = frame->root->client; + + if (conn_id) { + lock->connection_id = gf_strdup (conn_id); + } + + conf = __lock_grantable (dom, basename, type); + if (conf) { + ret = -EAGAIN; + if (nonblock){ + GF_FREE (lock->connection_id); + GF_FREE ((char *)lock->basename); + GF_FREE (lock); + goto out; + + } + + gettimeofday (&lock->blkd_time, NULL); + list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); + + gf_log (this->name, GF_LOG_TRACE, + "Blocking lock: {pinode=%p, basename=%s}", + pinode, basename); + + goto out; + } + + if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) { + ret = -EAGAIN; + if (nonblock) { + GF_FREE (lock->connection_id); + GF_FREE ((char *) lock->basename); + GF_FREE (lock); + goto out; + + } + lock->frame = frame; + lock->this = this; + + gettimeofday (&lock->blkd_time, NULL); + list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); + + gf_log (this->name, GF_LOG_TRACE, + "Lock is grantable, but blocking to prevent starvation"); + gf_log (this->name, GF_LOG_TRACE, + "Blocking lock: {pinode=%p, basename=%s}", + pinode, basename); + + ret = -EAGAIN; + goto out; + } + switch (type) { + + case ENTRYLK_WRLCK: + gettimeofday (&lock->granted_time, NULL); + list_add_tail (&lock->domain_list, &dom->entrylk_list); + break; + + default: + + gf_log (this->name, GF_LOG_DEBUG, + "Invalid type for entrylk specified: %d", type); + ret = -EINVAL; + goto out; + } + + ret = 0; +out: + return ret; +} + +/** + * __unlock_name - unlock a name in a directory + * @inode: inode for the directory to unlock in + * @basename: name of the entry to unlock + * if null, unlock the entire directory + */ + +pl_entry_lock_t * +__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *ret_lock = NULL; + + lock = __find_most_matching_lock (dom, basename); + + if (!lock) { + gf_log ("locks", GF_LOG_DEBUG, + "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found", + basename); + goto out; + } + + if (names_equal (lock->basename, basename) + && lock->type == type) { + + if (type == ENTRYLK_WRLCK) { + list_del_init (&lock->domain_list); + ret_lock = lock; + } + } else { + gf_log ("locks", GF_LOG_DEBUG, + "Unlock for a non-existing lock!"); + goto out; + } + +out: + return ret_lock; +} + +uint32_t +check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename) +{ + uint32_t entrylk = 0; + pl_inode_t *pinode = 0; + pl_dom_list_t *dom = NULL; + pl_entry_lock_t *conf = NULL; + + pinode = pl_inode_get (this, parent); + if (!pinode) + goto out; + pthread_mutex_lock (&pinode->mutex); + { + list_for_each_entry (dom, &pinode->dom_list, inode_list) { + conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK); + if (conf && conf->basename) { + entrylk = 1; + break; + } + } + } + pthread_mutex_unlock (&pinode->mutex); + +out: + return entrylk; +} + +void +__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct list_head *granted) +{ + int bl_ret = 0; + pl_entry_lock_t *bl = NULL; + pl_entry_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&dom->blocked_entrylks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, + blocked_locks) { + + list_del_init (&bl->blocked_locks); + + + gf_log ("locks", GF_LOG_TRACE, + "Trying to unblock: {pinode=%p, basename=%s}", + pl_inode, bl->basename); + + bl_ret = __lock_name (pl_inode, bl->basename, bl->type, + bl->frame, dom, bl->this, 0, + bl->connection_id); + + if (bl_ret == 0) { + list_add (&bl->blocked_locks, granted); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "should never happen"); + GF_FREE (bl->connection_id); + GF_FREE ((char *)bl->basename); + GF_FREE (bl); + } + } + return; +} + +/* Grants locks if possible which are blocked on a lock */ +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *unlocked, pl_dom_list_t *dom) +{ + struct list_head granted_list; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lock = NULL; + + INIT_LIST_HEAD (&granted_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_entry_locks (this, pl_inode, dom, + &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { + list_del_init (&lock->blocked_locks); + + entrylk_trace_out (this, lock->frame, NULL, NULL, NULL, + lock->basename, ENTRYLK_LOCK, lock->type, + 0, 0); + + STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL); + + GF_FREE (lock->connection_id); + GF_FREE ((char *)lock->basename); + GF_FREE (lock); + } + + GF_FREE ((char *)unlocked->basename); + GF_FREE (unlocked->connection_id); + GF_FREE (unlocked); + + return; +} + +/** + * release_entry_locks_for_client: release all entry locks from this + * client for this loc_t + */ + +static int +release_entry_locks_for_client (xlator_t *this, pl_inode_t *pinode, + pl_dom_list_t *dom, client_t *client) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *tmp = NULL; + struct list_head granted; + struct list_head released; + + INIT_LIST_HEAD (&granted); + INIT_LIST_HEAD (&released); + + pthread_mutex_lock (&pinode->mutex); + { + list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks, + blocked_locks) { + if (lock->trans != client) + continue; + + list_del_init (&lock->blocked_locks); + + gf_log (this->name, GF_LOG_TRACE, + "releasing lock on held by " + "{client=%p}", client); + + list_add (&lock->blocked_locks, &released); + + } + + list_for_each_entry_safe (lock, tmp, &dom->entrylk_list, + domain_list) { + if (lock->trans != client) + continue; + + list_del_init (&lock->domain_list); + + gf_log (this->name, GF_LOG_TRACE, + "releasing lock on held by " + "{client=%p}", client); + + GF_FREE ((char *)lock->basename); + GF_FREE (lock->connection_id); + GF_FREE (lock); + } + + __grant_blocked_entry_locks (this, pinode, dom, &granted); + + } + + pthread_mutex_unlock (&pinode->mutex); + + list_for_each_entry_safe (lock, tmp, &released, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL); + + GF_FREE ((char *)lock->basename); + GF_FREE (lock->connection_id); + GF_FREE (lock); + + } + + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL); + + GF_FREE ((char *)lock->basename); + GF_FREE (lock->connection_id); + GF_FREE (lock); + } + + return 0; +} + +/* Common entrylk code called by pl_entrylk and pl_fentrylk */ +int +pl_common_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, inode_t *inode, const char *basename, + entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd, + dict_t *xdata) + +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + char unwind = 1; + GF_UNUSED int dict_ret = -1; + pl_inode_t *pinode = NULL; + pl_entry_lock_t *unlocked = NULL; + pl_dom_list_t *dom = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + + if (xdata) + dict_ret = dict_get_str (xdata, "connection-id", &conn_id); + + pinode = pl_inode_get (this, inode); + if (!pinode) { + op_errno = ENOMEM; + goto out; + } + + dom = get_domain (pinode, volume); + if (!dom){ + op_errno = ENOMEM; + goto out; + } + + entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type); + + if (frame->root->lk_owner.len == 0) { + /* + this is a special case that means release + all locks from this client + */ + + gf_log (this->name, GF_LOG_TRACE, + "Releasing locks for client %p", frame->root->client); + + release_entry_locks_for_client (this, pinode, dom, + frame->root->client); + op_ret = 0; + + goto out; + } + + switch (cmd) { + case ENTRYLK_LOCK: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, dom, this, 0, conn_id); + } + pthread_mutex_unlock (&pinode->mutex); + + op_errno = -ret; + if (ret < 0) { + if (ret == -EAGAIN) + unwind = 0; + else + unwind = 1; + goto out; + } else { + op_ret = 0; + op_errno = 0; + unwind = 1; + goto out; + } + + break; + + case ENTRYLK_LOCK_NB: + unwind = 1; + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, dom, this, 1, conn_id); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_UNLOCK: + pthread_mutex_lock (&pinode->mutex); + { + unlocked = __unlock_name (dom, basename, type); + } + pthread_mutex_unlock (&pinode->mutex); + + if (unlocked) + grant_blocked_entry_locks (this, pinode, unlocked, dom); + + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "Unexpected case in entrylk (cmd=%d). Please file" + "a bug report at http://bugs.gluster.com", cmd); + goto out; + } + + op_ret = 0; +out: + pl_update_refkeeper (this, inode); + if (unwind) { + entrylk_trace_out (this, frame, volume, fd, loc, basename, + cmd, type, op_ret, op_errno); + + ctx = pl_ctx_get (frame->root->client, this); + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + + if (cmd == ENTRYLK_UNLOCK) + pl_del_locker (ctx->ltable, volume, loc, fd, + &frame->root->lk_owner, + GF_FOP_ENTRYLK); + else + pl_add_locker (ctx->ltable, volume, loc, fd, + frame->root->pid, + &frame->root->lk_owner, + GF_FOP_ENTRYLK); + +unwind: + STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL); + } else { + entrylk_trace_block (this, frame, volume, fd, loc, basename, + cmd, type); + } + + + return 0; +} + +/** + * pl_entrylk: + * + * Locking on names (directory entries) + */ + +int +pl_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, + type, loc, NULL, xdata); + + return 0; +} + + +/** + * pl_fentrylk: + * + * Locking on names (directory entries) + */ + +int +pl_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, + type, NULL, fd, xdata); + + return 0; +} + + +int32_t +__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode) +{ + int32_t count = 0; + pl_entry_lock_t *lock = NULL; + pl_dom_list_t *dom = NULL; + + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + count++; + } + + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + count++; + } + + } + + return count; +} + +int32_t +get_entrylk_count (xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = 0; + int32_t count = 0; + + ret = inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } + + pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + + pthread_mutex_lock (&pl_inode->mutex); + { + count = __get_entrylk_count (this, pl_inode); + } + pthread_mutex_unlock (&pl_inode->mutex); + +out: + return count; +} diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c new file mode 100644 index 000000000..508523e11 --- /dev/null +++ b/xlators/features/locks/src/inodelk.c @@ -0,0 +1,825 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +inline void +__delete_inode_lock (pl_inode_lock_t *lock) +{ + list_del (&lock->list); +} + +static inline void +__pl_inodelk_ref (pl_inode_lock_t *lock) +{ + lock->ref++; +} + +inline void +__pl_inodelk_unref (pl_inode_lock_t *lock) +{ + lock->ref--; + if (!lock->ref) { + GF_FREE (lock->connection_id); + GF_FREE (lock); + } +} + +/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */ +static inline int +inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) + return 1; + + return 0; +} + +void +pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain) +{ + char *cmd_str = NULL; + char *type_str = NULL; + + switch (cmd) { +#if F_GETLK != F_GETLK64 + case F_GETLK64: +#endif + case F_GETLK: + cmd_str = "GETLK"; + break; + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + cmd_str = "SETLK"; + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + cmd_str = "SETLKW"; + break; + + default: + cmd_str = "UNKNOWN"; + break; + } + + switch (flock->l_type) { + case F_RDLCK: + type_str = "READ"; + break; + case F_WRLCK: + type_str = "WRITE"; + break; + case F_UNLCK: + type_str = "UNLOCK"; + break; + default: + type_str = "UNKNOWN"; + break; + } + + snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, " + "domain: %s, start=%llu, len=%llu, pid=%llu", + cmd_str, type_str, domain, + (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid); +} + +/* Determine if the two inodelks overlap reach other's lock regions */ +static int +inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); +} + +/* Returns true if the 2 inodelks have the same owner */ +static inline int +same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + return (is_same_lkowner (&l1->owner, &l2->owner) && + (l1->client == l2->client)); +} + +/* Returns true if the 2 inodelks conflict with each other */ +static int +inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + return (inodelk_overlap (l1, l2) && + inodelk_type_conflict (l1, l2)); +} + +/* Determine if lock is grantable or not */ +static pl_inode_lock_t * +__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock) +{ + pl_inode_lock_t *l = NULL; + pl_inode_lock_t *ret = NULL; + if (list_empty (&dom->inodelk_list)) + goto out; + list_for_each_entry (l, &dom->inodelk_list, list){ + if (inodelk_conflict (lock, l) && + !same_inodelk_owner (lock, l)) { + ret = l; + goto out; + } + } +out: + return ret; +} + +static pl_inode_lock_t * +__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock) +{ + pl_inode_lock_t *l = NULL; + pl_inode_lock_t *ret = NULL; + + if (list_empty (&dom->blocked_inodelks)) + return NULL; + + list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) { + if (inodelk_conflict (lock, l)) { + ret = l; + goto out; + } + } + +out: + return ret; +} + +static int +__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock) +{ + pl_inode_lock_t *lock = NULL; + + list_for_each_entry (lock, &dom->inodelk_list, list) { + if (same_inodelk_owner (lock, newlock)) + return 1; + } + + list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { + if (same_inodelk_owner (lock, newlock)) + return 1; + } + + return 0; +} + + +/* Determines if lock can be granted and adds the lock. If the lock + * is blocking, adds it to the blocked_inodelks list of the domain. + */ +static int +__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + int can_block, pl_dom_list_t *dom) +{ + pl_inode_lock_t *conf = NULL; + int ret = -EINVAL; + + conf = __inodelk_grantable (dom, lock); + if (conf){ + ret = -EAGAIN; + if (can_block == 0) + goto out; + + gettimeofday (&lock->blkd_time, NULL); + list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); + + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + + + goto out; + } + + if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) { + ret = -EAGAIN; + if (can_block == 0) + goto out; + + gettimeofday (&lock->blkd_time, NULL); + list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); + + gf_log (this->name, GF_LOG_TRACE, + "Lock is grantable, but blocking to prevent starvation"); + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + + + goto out; + } + __pl_inodelk_ref (lock); + gettimeofday (&lock->granted_time, NULL); + list_add (&lock->list, &dom->inodelk_list); + + ret = 0; + +out: + return ret; +} + +/* Return true if the two inodelks have exactly same lock boundaries */ +static int +inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + if ((l1->fl_start == l2->fl_start) && + (l1->fl_end == l2->fl_end)) + return 1; + + return 0; +} + + +static pl_inode_lock_t * +find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom) +{ + pl_inode_lock_t *l = NULL; + list_for_each_entry (l, &dom->inodelk_list, list) { + if (inodelks_equal (l, lock) && + same_inodelk_owner (l, lock)) + return l; + } + return NULL; +} + +/* Set F_UNLCK removes a lock which has the exact same lock boundaries + * as the UNLCK lock specifies. If such a lock is not found, returns invalid + */ +static pl_inode_lock_t * +__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) +{ + + pl_inode_lock_t *conf = NULL; + + conf = find_matching_inodelk (lock, dom); + if (!conf) { + gf_log (this->name, GF_LOG_ERROR, + " Matching lock not found for unlock %llu-%llu, by %s " + "on %p", (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, + lkowner_utoa (&lock->owner), lock->client); + goto out; + } + __delete_inode_lock (conf); + gf_log (this->name, GF_LOG_DEBUG, + " Matching lock found for unlock %llu-%llu, by %s on %p", + (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner), + lock->client); + +out: + return conf; +} +static void +__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom) +{ + int bl_ret = 0; + pl_inode_lock_t *bl = NULL; + pl_inode_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&dom->blocked_inodelks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) { + + list_del_init (&bl->blocked_locks); + + bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom); + + if (bl_ret == 0) { + list_add (&bl->blocked_locks, granted); + } + } + return; +} + +/* Grant all inodelks blocked on a lock */ +void +grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom) +{ + struct list_head granted; + pl_inode_lock_t *lock; + pl_inode_lock_t *tmp; + + INIT_LIST_HEAD (&granted); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_inode_locks (this, pl_inode, &granted, dom); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + + pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, 0, 0, lock->volume); + + STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL); + } + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_del_init (&lock->blocked_locks); + __pl_inodelk_unref (lock); + } + } + pthread_mutex_unlock (&pl_inode->mutex); +} + +/* Release all inodelks from this client */ +static int +release_inode_locks_of_client (xlator_t *this, pl_dom_list_t *dom, + inode_t *inode, client_t *client) +{ + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *l = NULL; + + pl_inode_t * pinode = NULL; + + struct list_head released; + + char *path = NULL; + char *file = NULL; + + INIT_LIST_HEAD (&released); + + pinode = pl_inode_get (this, inode); + + pthread_mutex_lock (&pinode->mutex); + { + + list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) { + if (l->client != client) + continue; + + list_del_init (&l->blocked_locks); + + inode_path (inode, NULL, &path); + if (path) + file = path; + else + file = uuid_utoa (inode->gfid); + + gf_log (this->name, GF_LOG_DEBUG, + "releasing blocking lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + file, client, (uint64_t) l->client_pid, + lkowner_utoa (&l->owner)); + + list_add (&l->blocked_locks, &released); + if (path) { + GF_FREE (path); + path = NULL; + } + } + + list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) { + if (l->client != client) + continue; + + inode_path (inode, NULL, &path); + if (path) + file = path; + else + file = uuid_utoa (inode->gfid); + + gf_log (this->name, GF_LOG_DEBUG, + "releasing granted lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + file, client, (uint64_t) l->client_pid, + lkowner_utoa (&l->owner)); + + if (path) { + GF_FREE (path); + path = NULL; + } + + __delete_inode_lock (l); + __pl_inodelk_unref (l); + } + } + GF_FREE (path); + + pthread_mutex_unlock (&pinode->mutex); + + list_for_each_entry_safe (l, tmp, &released, blocked_locks) { + list_del_init (&l->blocked_locks); + + STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL); + //No need to take lock as the locks are only in one list + __pl_inodelk_unref (l); + } + + grant_blocked_inode_locks (this, pinode, dom); + return 0; +} + + +static int +pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + int can_block, pl_dom_list_t *dom) +{ + int ret = -EINVAL; + pl_inode_lock_t *retlock = NULL; + gf_boolean_t unref = _gf_true; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (lock->fl_type != F_UNLCK) { + ret = __lock_inodelk (this, pl_inode, lock, can_block, dom); + if (ret == 0) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->fl_start, + lock->fl_end); + } else if (ret == -EAGAIN) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + if (can_block) + unref = _gf_false; + } + } else { + retlock = __inode_unlock_lock (this, lock, dom); + if (!retlock) { + gf_log (this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + __pl_inodelk_unref (retlock); + + ret = 0; + } + } +out: + if (unref) + __pl_inodelk_unref (lock); + pthread_mutex_unlock (&pl_inode->mutex); + grant_blocked_inode_locks (this, pl_inode, dom); + return ret; +} + +/* Create a new inode_lock_t */ +pl_inode_lock_t * +new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, + call_frame_t *frame, xlator_t *this, const char *volume, + char *conn_id) + +{ + pl_inode_lock_t *lock = NULL; + + lock = GF_CALLOC (1, sizeof (*lock), + gf_locks_mt_pl_inode_lock_t); + if (!lock) { + return NULL; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->client = client; + lock->client_pid = client_pid; + lock->volume = volume; + lock->owner = frame->root->lk_owner; + lock->frame = frame; + lock->this = this; + + if (conn_id) { + lock->connection_id = gf_strdup (conn_id); + } + + INIT_LIST_HEAD (&lock->list); + INIT_LIST_HEAD (&lock->blocked_locks); + __pl_inodelk_ref (lock); + + return lock; +} + +int32_t +_pl_convert_volume (const char *volume, char **res) +{ + char *mdata_vol = NULL; + int ret = 0; + + mdata_vol = strrchr (volume, ':'); + //if the volume already ends with :metadata don't bother + if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0)) + return 0; + + ret = gf_asprintf (res, "%s:metadata", volume); + if (ret <= 0) + return ENOMEM; + return 0; +} + +int32_t +_pl_convert_volume_for_special_range (struct gf_flock *flock, + const char *volume, char **res) +{ + int32_t ret = 0; + + if ((flock->l_start == LLONG_MAX -1) && + (flock->l_len == 0)) { + ret = _pl_convert_volume (volume, res); + } + + return ret; +} + +/* Common inodelk code called from pl_inodelk and pl_finodelk */ +int +pl_common_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, inode_t *inode, int32_t cmd, + struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + GF_UNUSED int dict_ret = -1; + int can_block = 0; + pl_inode_t * pinode = NULL; + pl_inode_lock_t * reqlock = NULL; + pl_dom_list_t * dom = NULL; + char *res = NULL; + char *res1 = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + + if (xdata) + dict_ret = dict_get_str (xdata, "connection-id", &conn_id); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (inode, unwind); + VALIDATE_OR_GOTO (flock, unwind); + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_errno = EINVAL; + goto unwind; + } + + op_errno = _pl_convert_volume_for_special_range (flock, volume, &res); + if (op_errno) + goto unwind; + if (res) + volume = res; + + pl_trace_in (this, frame, fd, loc, cmd, flock, volume); + + pinode = pl_inode_get (this, inode); + if (!pinode) { + op_errno = ENOMEM; + goto unwind; + } + + dom = get_domain (pinode, volume); + if (!dom) { + op_errno = ENOMEM; + goto unwind; + } + + if (frame->root->lk_owner.len == 0) { + /* + special case: this means release all locks + from this client + */ + gf_log (this->name, GF_LOG_TRACE, + "Releasing all locks from client %p", frame->root->client); + + release_inode_locks_of_client (this, dom, inode, frame->root->client); + _pl_convert_volume (volume, &res1); + if (res1) { + dom = get_domain (pinode, res1); + if (dom) + release_inode_locks_of_client (this, dom, + inode, frame->root->client); + } + + op_ret = 0; + goto unwind; + } + + reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid, + frame, this, volume, conn_id); + + if (!reqlock) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + + switch (cmd) { + case F_SETLKW: + can_block = 1; + + /* fall through */ + + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); + ret = pl_inode_setlk (this, pinode, reqlock, + can_block, dom); + + if (ret < 0) { + if ((can_block) && (F_UNLCK != flock->l_type)) { + pl_trace_block (this, frame, fd, loc, + cmd, flock, volume); + goto out; + } + gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN"); + op_errno = -ret; + goto unwind; + } + break; + + default: + op_errno = ENOTSUP; + gf_log (this->name, GF_LOG_DEBUG, + "Lock command F_GETLK not supported for [f]inodelk " + "(cmd=%d)", + cmd); + goto unwind; + } + + op_ret = 0; + + ctx = pl_ctx_get (frame->root->client, this); + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + + if (flock->l_type == F_UNLCK) + pl_del_locker (ctx->ltable, volume, loc, fd, + &frame->root->lk_owner, + GF_FOP_INODELK); + else + pl_add_locker (ctx->ltable, volume, loc, fd, + frame->root->pid, + &frame->root->lk_owner, + GF_FOP_INODELK); + +unwind: + if ((inode != NULL) && (flock !=NULL)) { + pl_update_refkeeper (this, inode); + pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume); + } + + STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL); +out: + GF_FREE (res); + GF_FREE (res1); + return 0; +} + +int +pl_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) +{ + pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, + loc, NULL, xdata); + + return 0; +} + +int +pl_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) +{ + pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, + NULL, fd, xdata); + + return 0; + +} + +static inline int32_t +__get_inodelk_dom_count (pl_dom_list_t *dom) +{ + pl_inode_lock_t *lock = NULL; + int32_t count = 0; + + list_for_each_entry (lock, &dom->inodelk_list, list) { + count++; + } + list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { + count++; + } + return count; +} + +/* Returns the no. of locks (blocked/granted) held on a given domain name + * If @domname is NULL, returns the no. of locks in all the domains present. + * If @domname is non-NULL and non-existent, returns 0 */ +int32_t +__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname) +{ + int32_t count = 0; + pl_dom_list_t *dom = NULL; + + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + if (domname) { + if (strcmp (domname, dom->domain) == 0) { + count = __get_inodelk_dom_count (dom); + goto out; + } + + } else { + /* Counting locks from all domains */ + count += __get_inodelk_dom_count (dom); + + } + } + +out: + return count; +} + +int32_t +get_inodelk_count (xlator_t *this, inode_t *inode, char *domname) +{ + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = 0; + int32_t count = 0; + + ret = inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } + + pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + + pthread_mutex_lock (&pl_inode->mutex); + { + count = __get_inodelk_count (this, pl_inode, domname); + } + pthread_mutex_unlock (&pl_inode->mutex); + +out: + return count; +} diff --git a/xlators/features/locks/src/internal.c b/xlators/features/locks/src/internal.c deleted file mode 100644 index 6524721b4..000000000 --- a/xlators/features/locks/src/internal.c +++ /dev/null @@ -1,896 +0,0 @@ -/* - Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" - -#include "locks.h" -#include "common.h" - - -static int -release_inode_locks_of_transport (xlator_t *this, - inode_t *inode, transport_t *trans) -{ - posix_lock_t *tmp = NULL; - posix_lock_t *l = NULL; - - pl_inode_t * pinode = NULL; - - struct list_head granted; - - char *path = NULL; - - INIT_LIST_HEAD (&granted); - - pinode = pl_inode_get (this, inode); - - pthread_mutex_lock (&pinode->mutex); - { - if (list_empty (&pinode->int_list)) { - goto unlock; - } - - list_for_each_entry_safe (l, tmp, &pinode->int_list, list) { - if (l->transport != trans) - continue; - - list_del_init (&l->list); - - __delete_lock (pinode, l); - - inode_path (inode, NULL, &path); - - gf_log (this->name, GF_LOG_TRACE, - "releasing lock on %s held by " - "{transport=%p, pid=%"PRId64"}", - path, trans, - (uint64_t) l->client_pid); - - if (path) - FREE (path); - - __destroy_lock (l); - } - } -unlock: - pthread_mutex_unlock (&pinode->mutex); - - grant_blocked_locks (this, pinode, GF_LOCK_INTERNAL); - - return 0; -} - - -/** - * pl_inodelk: - * - * This fop provides fcntl-style locking on files for internal - * purposes. Locks held through this fop reside in a domain different - * from those held by applications. This fop is for the use of AFR. - */ - -int -pl_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *flock) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - int can_block = 0; - posix_locks_private_t * priv = NULL; - transport_t * transport = NULL; - pid_t client_pid = -1; - pl_inode_t * pinode = NULL; - posix_lock_t * reqlock = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (flock, out); - - if ((flock->l_start < 0) || (flock->l_len < 0)) { - op_errno = EINVAL; - goto unwind; - } - - transport = frame->root->trans; - client_pid = frame->root->pid; - - priv = (posix_locks_private_t *) this->private; - - VALIDATE_OR_GOTO (priv, out); - - pinode = pl_inode_get (this, loc->inode); - if (!pinode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - goto unwind; - } - - if (client_pid == 0) { - /* - special case: this means release all locks - from this transport - */ - gf_log (this->name, GF_LOG_TRACE, - "Releasing all locks from transport %p", transport); - - release_inode_locks_of_transport (this, loc->inode, transport); - goto unwind; - } - - reqlock = new_posix_lock (flock, transport, client_pid); - if (!reqlock) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - switch (cmd) { - case F_SETLKW: - can_block = 1; - reqlock->frame = frame; - reqlock->this = this; - - /* fall through */ - - case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); - ret = pl_setlk (this, pinode, reqlock, - can_block, GF_LOCK_INTERNAL); - - if (ret == -1) { - if (can_block) - goto out; - - gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN"); - op_errno = EAGAIN; - __destroy_lock (reqlock); - goto unwind; - } - break; - - default: - op_errno = ENOTSUP; - gf_log (this->name, GF_LOG_ERROR, - "Unexpected case in inodelk (cmd=%d). " - "Please file a bug report at http://bugs.gluster.com", - cmd); - goto unwind; - } - - op_ret = 0; - -unwind: - STACK_UNWIND (frame, op_ret, op_errno); -out: - return 0; -} - - -int -pl_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *flock) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - int can_block = 0; - posix_locks_private_t * priv = NULL; - transport_t * transport = NULL; - pid_t client_pid = -1; - pl_inode_t * pinode = NULL; - posix_lock_t * reqlock = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (flock, out); - - if ((flock->l_start < 0) || (flock->l_len < 0)) { - op_errno = EINVAL; - goto unwind; - } - - transport = frame->root->trans; - client_pid = frame->root->pid; - - priv = (posix_locks_private_t *) this->private; - - VALIDATE_OR_GOTO (priv, out); - - pinode = pl_inode_get (this, fd->inode); - if (!pinode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - goto unwind; - } - - if (client_pid == 0) { - /* - special case: this means release all locks - from this transport - */ - gf_log (this->name, GF_LOG_TRACE, - "Releasing all locks from transport %p", transport); - - release_inode_locks_of_transport (this, fd->inode, transport); - goto unwind; - } - - reqlock = new_posix_lock (flock, transport, client_pid); - if (!reqlock) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - switch (cmd) { - case F_SETLKW: - can_block = 1; - reqlock->frame = frame; - reqlock->this = this; - reqlock->fd = fd; - - /* fall through */ - - case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); - ret = pl_setlk (this, pinode, reqlock, - can_block, GF_LOCK_INTERNAL); - - if (ret == -1) { - if (can_block) - goto out; - - gf_log (this->name, GF_LOG_TRACE, "Returning EAGAIN"); - op_errno = EAGAIN; - __destroy_lock (reqlock); - goto unwind; - } - break; - - default: - op_errno = ENOTSUP; - gf_log (this->name, GF_LOG_ERROR, - "Unexpected case in finodelk (cmd=%d). " - "Please file a bug report at http://bugs.gluster.com", - cmd); - goto unwind; - } - - op_ret = 0; - -unwind: - STACK_UNWIND (frame, op_ret, op_errno); -out: - return 0; -} - - -/** - * types_conflict - do two types of lock conflict? - * @t1: type - * @t2: type - * - * two read locks do not conflict - * any other case conflicts - */ - -static int -types_conflict (entrylk_type t1, entrylk_type t2) -{ - return !((t1 == ENTRYLK_RDLCK) && (t2 == ENTRYLK_RDLCK)); -} - -/** - * all_names - does a basename represent all names? - * @basename: name to check - */ - -#define all_names(basename) ((basename == NULL) ? 1 : 0) - -/** - * names_conflict - do two names conflict? - * @n1: name - * @n2: name - */ - -static int -names_conflict (const char *n1, const char *n2) -{ - return all_names (n1) || all_names (n2) || !strcmp (n1, n2); -} - - -static int -names_equal (const char *n1, const char *n2) -{ - return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); -} - -/** - * lock_grantable - is this lock grantable? - * @inode: inode in which to look - * @basename: name we're trying to lock - * @type: type of lock - */ - -static pl_entry_lock_t * -__lock_grantable (pl_inode_t *pinode, const char *basename, entrylk_type type) -{ - pl_entry_lock_t *lock = NULL; - - if (list_empty (&pinode->dir_list)) - return NULL; - - list_for_each_entry (lock, &pinode->dir_list, inode_list) { - if (names_conflict (lock->basename, basename) && - types_conflict (lock->type, type)) - return lock; - } - - return NULL; -} - -/** - * find_most_matching_lock - find the lock struct which most matches in order of: - * lock on the exact basename || - * an all_names lock - * - * - * @inode: inode in which to look - * @basename: name to search for - */ - -static pl_entry_lock_t * -__find_most_matching_lock (pl_inode_t *pinode, const char *basename) -{ - pl_entry_lock_t *lock; - pl_entry_lock_t *all = NULL; - pl_entry_lock_t *exact = NULL; - - if (list_empty (&pinode->dir_list)) - return NULL; - - list_for_each_entry (lock, &pinode->dir_list, inode_list) { - if (all_names (lock->basename)) - all = lock; - else if (names_equal (lock->basename, basename)) - exact = lock; - } - - return (exact ? exact : all); -} - - -/** - * insert_new_lock - insert a new dir lock into the inode with the given parameters - * @pinode: inode to insert into - * @basename: basename for the lock - * @type: type of the lock - */ - -static pl_entry_lock_t * -new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, - transport_t *trans) -{ - pl_entry_lock_t *newlock = NULL; - - newlock = CALLOC (sizeof (pl_entry_lock_t), 1); - if (!newlock) { - goto out; - } - - newlock->basename = basename ? strdup (basename) : NULL; - newlock->type = type; - newlock->trans = trans; - - if (type == ENTRYLK_RDLCK) - newlock->read_count = 1; - - INIT_LIST_HEAD (&newlock->inode_list); - INIT_LIST_HEAD (&newlock->blocked_locks); - -out: - return newlock; -} - -/** - * lock_name - lock a name in a directory - * @inode: inode for the directory in which to lock - * @basename: name of the entry to lock - * if null, lock the entire directory - * - * the entire directory being locked is represented as: a single - * pl_entry_lock_t present in the entrylk_locks list with its - * basename = NULL - */ - -int -__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, - call_frame_t *frame, xlator_t *this, int nonblock) -{ - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *conf = NULL; - - transport_t *trans = NULL; - - int ret = -EINVAL; - - trans = frame->root->trans; - - conf = __lock_grantable (pinode, basename, type); - if (conf) { - ret = -EAGAIN; - if (nonblock) - goto out; - - lock = new_entrylk_lock (pinode, basename, type, trans); - - if (!lock) { - ret = -ENOMEM; - goto out; - } - - gf_log (this->name, GF_LOG_TRACE, - "Blocking lock: {pinode=%p, basename=%s}", - pinode, basename); - - lock->frame = frame; - lock->this = this; - lock->blocked = 1; - - list_add (&lock->blocked_locks, &conf->blocked_locks); - - - goto out; - } - - switch (type) { - case ENTRYLK_RDLCK: - lock = __find_most_matching_lock (pinode, basename); - - if (lock && names_equal (lock->basename, basename)) { - lock->read_count++; - - FREE (lock->basename); - FREE (lock); - - lock = NULL; - } else { - lock = new_entrylk_lock (pinode, basename, type, trans); - - if (!lock) { - ret = -ENOMEM; - goto out; - } - - list_add (&lock->inode_list, &pinode->dir_list); - } - break; - - case ENTRYLK_WRLCK: - lock = new_entrylk_lock (pinode, basename, type, trans); - - if (!lock) { - ret = -ENOMEM; - goto out; - } - - list_add (&lock->inode_list, &pinode->dir_list); - break; - } - - ret = 0; -out: - return ret; -} - - -/** - * unlock_name - unlock a name in a directory - * @inode: inode for the directory to unlock in - * @basename: name of the entry to unlock - * if null, unlock the entire directory - */ - -pl_entry_lock_t * -__unlock_name (pl_inode_t *pinode, const char *basename, entrylk_type type) -{ - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *ret_lock = NULL; - - lock = __find_most_matching_lock (pinode, basename); - - if (!lock) { - gf_log ("locks", GF_LOG_DEBUG, - "unlock on %s (type=%s) attempted but no matching lock found", - basename, type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : - "ENTRYLK_WRLCK"); - goto out; - } - - if (names_equal (lock->basename, basename) - && lock->type == type) { - if (type == ENTRYLK_RDLCK) { - lock->read_count--; - } - if (type == ENTRYLK_WRLCK || lock->read_count == 0) { - list_del (&lock->inode_list); - ret_lock = lock; - } - } else { - gf_log ("locks", GF_LOG_DEBUG, - "Unlock for a non-existing lock!"); - goto out; - } - -out: - return ret_lock; -} - - -void -__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_entry_lock_t *lock, - struct list_head *granted) -{ - int bl_ret = 0; - pl_entry_lock_t *bl = NULL; - pl_entry_lock_t *tmp = NULL; - - list_for_each_entry_safe (bl, tmp, &lock->blocked_locks, - blocked_locks) { - list_del_init (&bl->blocked_locks); - - /* TODO: error checking */ - - gf_log ("locks", GF_LOG_TRACE, - "Trying to unblock: {pinode=%p, basename=%s}", - pl_inode, bl->basename); - - bl_ret = __lock_name (pl_inode, bl->basename, bl->type, - bl->frame, bl->this, 0); - - if (bl_ret == 0) { - list_add (&bl->blocked_locks, granted); - } else { - if (bl->basename) - FREE (bl->basename); - FREE (bl); - } - } - return; -} - - -void -grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_entry_lock_t *unlocked) -{ - struct list_head granted_list; - pl_entry_lock_t *tmp = NULL; - pl_entry_lock_t *lock = NULL; - - INIT_LIST_HEAD (&granted_list); - - pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_entry_locks (this, pl_inode, unlocked, - &granted_list); - } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { - list_del_init (&lock->blocked_locks); - - STACK_UNWIND (lock->frame, 0, 0); - - FREE (lock->basename); - FREE (lock); - } - - FREE (unlocked->basename); - FREE (unlocked); - - return; -} - - -/** - * release_entry_locks_for_transport: release all entry locks from this - * transport for this loc_t - */ - -static int -release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode, - transport_t *trans) -{ - pl_entry_lock_t *lock; - pl_entry_lock_t *tmp; - struct list_head granted; - - INIT_LIST_HEAD (&granted); - - pthread_mutex_lock (&pinode->mutex); - { - if (list_empty (&pinode->dir_list)) { - goto unlock; - } - - list_for_each_entry_safe (lock, tmp, &pinode->dir_list, - inode_list) { - if (lock->trans != trans) - continue; - - list_del_init (&lock->inode_list); - __grant_blocked_entry_locks (this, pinode, lock, - &granted); - - FREE (lock->basename); - FREE (lock); - } - } -unlock: - pthread_mutex_unlock (&pinode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - list_del_init (&lock->blocked_locks); - - STACK_UNWIND (lock->frame, 0, 0); - - FREE (lock->basename); - FREE (lock); - } - - return 0; -} - - -/** - * pl_entrylk: - * - * Locking on names (directory entries) - */ - -int -pl_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - - transport_t * transport = NULL; - pid_t pid = -1; - - pl_inode_t * pinode = NULL; - int ret = -1; - pl_entry_lock_t *unlocked = NULL; - char unwind = 1; - - - pinode = pl_inode_get (this, loc->inode); - if (!pinode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - goto out; - } - - pid = frame->root->pid; - transport = frame->root->trans; - - if (pid == 0) { - /* - this is a special case that means release - all locks from this transport - */ - - gf_log (this->name, GF_LOG_TRACE, - "Releasing locks for transport %p", transport); - - release_entry_locks_for_transport (this, pinode, transport); - op_ret = 0; - - goto out; - } - - switch (cmd) { - case ENTRYLK_LOCK: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, this, 0); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - if (ret == -EAGAIN) - unwind = 0; - op_errno = -ret; - goto out; - } - - break; - - case ENTRYLK_LOCK_NB: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, this, 1); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - op_errno = -ret; - goto out; - } - - break; - - case ENTRYLK_UNLOCK: - pthread_mutex_lock (&pinode->mutex); - { - unlocked = __unlock_name (pinode, basename, type); - } - pthread_mutex_unlock (&pinode->mutex); - - if (unlocked) - grant_blocked_entry_locks (this, pinode, unlocked); - - break; - - default: - gf_log (this->name, GF_LOG_ERROR, - "Unexpected case in entrylk (cmd=%d). Please file" - "a bug report at http://bugs.gluster.com", cmd); - goto out; - } - - op_ret = 0; -out: - if (unwind) { - STACK_UNWIND (frame, op_ret, op_errno); - } - - return 0; -} - - -/** - * pl_entrylk: - * - * Locking on names (directory entries) - */ - -int -pl_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - - transport_t * transport = NULL; - pid_t pid = -1; - - pl_inode_t * pinode = NULL; - int ret = -1; - pl_entry_lock_t *unlocked = NULL; - char unwind = 1; - - pinode = pl_inode_get (this, fd->inode); - if (!pinode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory :("); - goto out; - } - - pid = frame->root->pid; - transport = frame->root->trans; - - if (pid == 0) { - /* - this is a special case that means release - all locks from this transport - */ - - gf_log (this->name, GF_LOG_TRACE, - "Releasing locks for transport %p", transport); - - release_entry_locks_for_transport (this, pinode, transport); - op_ret = 0; - goto out; - } - - switch (cmd) { - case ENTRYLK_LOCK: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, this, 0); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - if (ret == -EAGAIN) - unwind = 0; - op_errno = -ret; - goto out; - } - break; - - case ENTRYLK_LOCK_NB: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, this, 1); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - op_errno = -ret; - goto out; - } - break; - - case ENTRYLK_UNLOCK: - pthread_mutex_lock (&pinode->mutex); - { - unlocked = __unlock_name (pinode, basename, type); - } - pthread_mutex_unlock (&pinode->mutex); - - if (unlocked) - grant_blocked_entry_locks (this, pinode, unlocked); - break; - - default: - gf_log (this->name, GF_LOG_ERROR, - "Unexpected case in fentrylk (cmd=%d). " - "Please file a bug report at http://bugs.gluster.com", - cmd); - goto out; - } - - op_ret = 0; -out: - if (unwind) { - STACK_UNWIND (frame, op_ret, op_errno); - } - - return 0; -} diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h new file mode 100644 index 000000000..08aeb0a79 --- /dev/null +++ b/xlators/features/locks/src/locks-mem-types.h @@ -0,0 +1,29 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __LOCKS_MEM_TYPES_H__ +#define __LOCKS_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_locks_mem_types_ { + gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1, + gf_locks_mt_pl_inode_t, + gf_locks_mt_posix_lock_t, + gf_locks_mt_pl_entry_lock_t, + gf_locks_mt_pl_inode_lock_t, + gf_locks_mt_truncate_ops, + gf_locks_mt_pl_rw_req_t, + gf_locks_mt_posix_locks_private_t, + gf_locks_mt_pl_fdctx_t, + gf_locks_mt_end +}; +#endif + diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 5a834657d..76fc941d7 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef __POSIX_LOCKS_H__ #define __POSIX_LOCKS_H__ @@ -26,86 +16,177 @@ #endif #include "compat-errno.h" -#include "transport.h" #include "stack.h" #include "call-stub.h" +#include "locks-mem-types.h" +#include "client_t.h" + +#include "lkowner.h" struct __pl_fd; struct __posix_lock { - struct list_head list; + struct list_head list; + + short fl_type; + off_t fl_start; + off_t fl_end; - short fl_type; - off_t fl_start; - off_t fl_end; + short blocked; /* waiting to acquire */ + struct gf_flock user_flock; /* the flock supplied by the user */ + xlator_t *this; /* required for blocked locks */ + unsigned long fd_num; - short blocked; /* waiting to acquire */ - struct flock user_flock; /* the flock supplied by the user */ - xlator_t *this; /* required for blocked locks */ - fd_t *fd; + fd_t *fd; + call_frame_t *frame; - call_frame_t *frame; + struct timeval blkd_time; /*time at which lock was queued into blkd list*/ + struct timeval granted_time; /*time at which lock was queued into active list*/ - /* These two together serve to uniquely identify each process - across nodes */ + /* These two together serve to uniquely identify each process + across nodes */ - transport_t *transport; /* to identify client node */ - pid_t client_pid; /* pid of client process */ + void *client; /* to identify client node */ + gf_lkowner_t owner; + pid_t client_pid; /* pid of client process */ }; typedef struct __posix_lock posix_lock_t; +struct __pl_inode_lock { + struct list_head list; + struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */ + int ref; + + short fl_type; + off_t fl_start; + off_t fl_end; + + const char *volume; + + struct gf_flock user_flock; /* the flock supplied by the user */ + xlator_t *this; /* required for blocked locks */ + fd_t *fd; + + call_frame_t *frame; + + struct timeval blkd_time; /*time at which lock was queued into blkd list*/ + struct timeval granted_time; /*time at which lock was queued into active list*/ + + /* These two together serve to uniquely identify each process + across nodes */ + + void *client; /* to identify client node */ + gf_lkowner_t owner; + pid_t client_pid; /* pid of client process */ + + char *connection_id; /* stores the client connection id */ +}; +typedef struct __pl_inode_lock pl_inode_lock_t; + struct __pl_rw_req_t { - struct list_head list; - call_stub_t *stub; - posix_lock_t region; + struct list_head list; + call_stub_t *stub; + posix_lock_t region; }; typedef struct __pl_rw_req_t pl_rw_req_t; +struct __pl_dom_list_t { + struct list_head inode_list; /* list_head back to pl_inode_t */ + const char *domain; + struct list_head entrylk_list; /* List of entry locks */ + struct list_head blocked_entrylks; /* List of all blocked entrylks */ + struct list_head inodelk_list; /* List of inode locks */ + struct list_head blocked_inodelks; /* List of all blocked inodelks */ +}; +typedef struct __pl_dom_list_t pl_dom_list_t; struct __entry_lock { - struct list_head inode_list; /* list_head back to pl_inode_t */ - struct list_head blocked_locks; /* locks blocked due to this lock */ - - call_frame_t *frame; - xlator_t *this; - int blocked; - - const char *basename; - entrylk_type type; - unsigned int read_count; /* number of read locks */ - transport_t *trans; + struct list_head domain_list; /* list_head back to pl_dom_list_t */ + struct list_head blocked_locks; /* list_head back to blocked_entrylks */ + + call_frame_t *frame; + xlator_t *this; + + const char *volume; + + const char *basename; + entrylk_type type; + + struct timeval blkd_time; /*time at which lock was queued into blkd list*/ + struct timeval granted_time; /*time at which lock was queued into active list*/ + + void *trans; + gf_lkowner_t owner; + pid_t client_pid; /* pid of client process */ + + char *connection_id; /* stores the client connection id */ }; typedef struct __entry_lock pl_entry_lock_t; -/* The "simulated" inode. This contains a list of all the locks associated +/* The "simulated" inode. This contains a list of all the locks associated with this file */ struct __pl_inode { - pthread_mutex_t mutex; - - struct list_head dir_list; /* list of entry locks */ - struct list_head ext_list; /* list of fcntl locks */ - struct list_head int_list; /* list of internal locks */ - struct list_head rw_list; /* list of waiting r/w requests */ - int mandatory; /* if mandatory locking is enabled */ + pthread_mutex_t mutex; + + struct list_head dom_list; /* list of domains */ + struct list_head ext_list; /* list of fcntl locks */ + struct list_head rw_list; /* list of waiting r/w requests */ + struct list_head reservelk_list; /* list of reservelks */ + struct list_head blocked_reservelks; /* list of blocked reservelks */ + struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/ + int mandatory; /* if mandatory locking is enabled */ + + inode_t *refkeeper; /* hold refs on an inode while locks are + held to prevent pruning */ }; typedef struct __pl_inode pl_inode_t; -#define LOCKS_FOR_DOMAIN(inode,domain) (domain == GF_LOCK_POSIX \ - ? inode->fcntl_locks \ - : inode->inodelk_locks) - struct __pl_fd { - gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */ + gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */ }; typedef struct __pl_fd pl_fd_t; typedef struct { - gf_boolean_t mandatory; /* if mandatory locking is enabled */ + gf_boolean_t mandatory; /* if mandatory locking is enabled */ + gf_boolean_t trace; /* trace lock requests in and out */ + char *brickname; } posix_locks_private_t; +typedef struct { + gf_boolean_t entrylk_count_req; + gf_boolean_t inodelk_count_req; + gf_boolean_t inodelk_dom_count_req; + gf_boolean_t posixlk_count_req; + gf_boolean_t parent_entrylk_req; + + /* used by {f,}truncate */ + loc_t loc; + fd_t *fd; + off_t offset; + dict_t *xdata; + enum {TRUNCATE, FTRUNCATE} op; +} pl_local_t; + + +typedef struct { + struct list_head locks_list; +} pl_fdctx_t; + + +typedef struct _locks_ctx { + gf_lock_t ltable_lock; /* only for replace, + ltable has its own internal + lock for operations */ + struct _lock_table *ltable; +} pl_ctx_t; + + +pl_ctx_t * +pl_ctx_get (client_t *client, xlator_t *xlator); + #endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 7389e1e6a..7bfb38a51 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <unistd.h> #include <fcntl.h> #include <limits.h> @@ -37,6 +27,9 @@ #include "locks.h" #include "common.h" #include "statedump.h" +#include "clear.h" +#include "defaults.h" +#include "syncop.h" #ifndef LLONG_MAX #define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ @@ -47,887 +40,2733 @@ void do_blocked_rw (pl_inode_t *); static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t); +static int format_brickname(char *); +int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *); +static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); -struct _truncate_ops { - loc_t loc; - fd_t *fd; - off_t offset; - enum {TRUNCATE, FTRUNCATE} op; -}; +static pl_fdctx_t * +pl_new_fdctx () +{ + pl_fdctx_t *fdctx = NULL; + + fdctx = GF_CALLOC (1, sizeof (*fdctx), + gf_locks_mt_pl_fdctx_t); + GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out); + + INIT_LIST_HEAD (&fdctx->locks_list); + +out: + return fdctx; +} + +static pl_fdctx_t * +pl_check_n_create_fdctx (xlator_t *this, fd_t *fd) +{ + int ret = 0; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + + GF_VALIDATE_OR_GOTO ("posix-locks", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + LOCK (&fd->lock); + { + ret = __fd_ctx_get (fd, this, &tmp); + if ((ret != 0) || (tmp == 0)) { + fdctx = pl_new_fdctx (); + if (fdctx == NULL) { + goto unlock; + } + } + + ret = __fd_ctx_set (fd, this, (uint64_t)(long)fdctx); + if (ret != 0) { + GF_FREE (fdctx); + fdctx = NULL; + gf_log (this->name, GF_LOG_DEBUG, + "failed to set fd ctx"); + } + } +unlock: + UNLOCK (&fd->lock); +out: + return fdctx; +} int pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *buf) + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - struct _truncate_ops *local = NULL; + pl_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op == TRUNCATE) - loc_wipe (&local->loc); + if (local->op == TRUNCATE) + loc_wipe (&local->loc); - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; + if (local->xdata) + dict_unref (local->xdata); + if (local->fd) + fd_unref (local->fd); + + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, + prebuf, postbuf, xdata); + return 0; } static int -truncate_allowed (pl_inode_t *pl_inode, - transport_t *transport, pid_t client_pid, - off_t offset) +truncate_allowed (pl_inode_t *pl_inode, + client_t *client, pid_t client_pid, + gf_lkowner_t *owner, off_t offset) { - posix_lock_t *l = NULL; - posix_lock_t region = {.list = {0, }, }; - int ret = 1; - - region.fl_start = offset; - region.fl_end = LLONG_MAX; - region.transport = transport; - region.client_pid = client_pid; - - pthread_mutex_lock (&pl_inode->mutex); - { - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (!l->blocked - && locks_overlap (®ion, l) - && !same_owner (®ion, l)) { - ret = 0; - break; - } - } - } - pthread_mutex_unlock (&pl_inode->mutex); + posix_lock_t *l = NULL; + posix_lock_t region = {.list = {0, }, }; + int ret = 1; + + region.fl_start = offset; + region.fl_end = LLONG_MAX; + region.client = client; + region.client_pid = client_pid; + region.owner = *owner; + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (!l->blocked + && locks_overlap (®ion, l) + && !same_owner (®ion, l)) { + ret = 0; + gf_log ("posix-locks", GF_LOG_TRACE, "Truncate " + "allowed"); + break; + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); - return ret; + return ret; } static int truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - posix_locks_private_t *priv = NULL; - struct _truncate_ops *local = NULL; - inode_t *inode = NULL; - pl_inode_t *pl_inode = NULL; + posix_locks_private_t *priv = NULL; + pl_local_t *local = NULL; + inode_t *inode = NULL; + pl_inode_t *pl_inode = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - if (op_ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "got error (errno=%d, stderror=%s) from child", - op_errno, strerror (op_errno)); - goto unwind; - } + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "got error (errno=%d, stderror=%s) from child", + op_errno, strerror (op_errno)); + goto unwind; + } - if (local->op == TRUNCATE) - inode = local->loc.inode; - else - inode = local->fd->inode; + if (local->op == TRUNCATE) + inode = local->loc.inode; + else + inode = local->fd->inode; - pl_inode = pl_inode_get (this, inode); - if (!pl_inode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); + pl_inode = pl_inode_get (this, inode); + if (!pl_inode) { op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - if (priv->mandatory - && pl_inode->mandatory - && !truncate_allowed (pl_inode, frame->root->trans, - frame->root->pid, local->offset)) { + op_errno = ENOMEM; + goto unwind; + } + + if (priv->mandatory + && pl_inode->mandatory + && !truncate_allowed (pl_inode, frame->root->client, + frame->root->pid, &frame->root->lk_owner, + local->offset)) { op_ret = -1; - op_errno = EAGAIN; - goto unwind; - } - - switch (local->op) { - case TRUNCATE: - STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->truncate, - &local->loc, local->offset); - break; - case FTRUNCATE: - STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ftruncate, - local->fd, local->offset); - break; - } - - return 0; + op_errno = EAGAIN; + goto unwind; + } -unwind: - if (local->op == TRUNCATE) - loc_wipe (&local->loc); + switch (local->op) { + case TRUNCATE: + STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->truncate, + &local->loc, local->offset, local->xdata); + break; + case FTRUNCATE: + STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->ftruncate, + local->fd, local->offset, local->xdata); + break; + } - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; + return 0; + +unwind: + gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, " + "error: %s", op_ret, strerror (op_errno)); + if (local->op == TRUNCATE) + loc_wipe (&local->loc); + if (local->xdata) + dict_unref (local->xdata); + if (local->fd) + fd_unref (local->fd); + + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata); + return 0; } int pl_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset) + loc_t *loc, off_t offset, dict_t *xdata) { - struct _truncate_ops *local = NULL; + pl_local_t *local = NULL; - local = CALLOC (1, sizeof (struct _truncate_ops)); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto unwind; - } + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); - local->op = TRUNCATE; - local->offset = offset; - loc_copy (&local->loc, loc); + local->op = TRUNCATE; + local->offset = offset; + loc_copy (&local->loc, loc); + if (xdata) + local->xdata = dict_ref (xdata); - frame->local = local; + frame->local = local; - STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc); + STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->stat, loc, NULL); - return 0; + return 0; unwind: - STACK_UNWIND (frame, -1, ENOMEM, NULL); + gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, " + "error: %s", loc->path, -1, strerror (ENOMEM)); + STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } int pl_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) + fd_t *fd, off_t offset, dict_t *xdata) { - struct _truncate_ops *local = NULL; + pl_local_t *local = NULL; - local = CALLOC (1, sizeof (struct _truncate_ops)); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto unwind; - } + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); - local->op = FTRUNCATE; - local->offset = offset; - local->fd = fd; + local->op = FTRUNCATE; + local->offset = offset; + local->fd = fd_ref (fd); + if (xdata) + local->xdata = dict_ref (xdata); - frame->local = local; + frame->local = local; - STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd); - return 0; + STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; unwind: - STACK_UNWIND (frame, -1, ENOMEM, NULL); + gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, " + "error: %s", -1, strerror (ENOMEM)); + STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } +int +pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd) +{ + posix_lock_t *l = NULL; + int found = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + found = 1; + break; + } + } + + } + pthread_mutex_unlock (&pl_inode->mutex); + return found; +} + +static void +delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd) +{ + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + if (l->blocked) { + list_move_tail (&l->list, &blocked_list); + continue; + } + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } + + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (l, tmp, &blocked_list, list) { + list_del_init(&l->list); + STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock, + NULL); + __destroy_lock (l); + } + + grant_blocked_locks (this, pl_inode); + + do_blocked_rw (pl_inode); + +} static void __delete_locks_of_owner (pl_inode_t *pl_inode, - transport_t *transport, pid_t pid) + client_t *client, gf_lkowner_t *owner) { - posix_lock_t *tmp = NULL; - posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + + /* TODO: what if it is a blocked lock with pending l->frame */ + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if (l->blocked) + continue; + if ((l->client == client) && + is_same_lkowner (&l->owner, owner)) { + gf_log ("posix-locks", GF_LOG_TRACE, + " Flushing lock" + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", + l->client_pid, + lkowner_utoa (&l->owner), + l->user_flock.l_start, + l->user_flock.l_len, + l->blocked == 1 ? "Blocked" : "Active"); + + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } - /* TODO: what if it is a blocked lock with pending l->frame */ + return; +} - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if ((l->transport == transport) - && (l->client_pid == pid)) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } - } - list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) { - if ((l->transport == transport) - && (l->client_pid == pid)) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } - } +int32_t +pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; - return; +} + +int32_t +pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + int op_ret = -1; + int32_t bcount = 0; + int32_t gcount = 0; + char key[PATH_MAX] = {0, }; + char *lk_summary = NULL; + pl_inode_t *pl_inode = NULL; + dict_t *dict = NULL; + clrlk_args args = {0,}; + char *brickname = NULL; + + if (!name) + goto usual; + + if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD))) + goto usual; + + if (clrlk_parse_args (name, &args)) { + op_errno = EINVAL; + goto out; + } + + dict = dict_new (); + if (!dict) { + op_errno = ENOMEM; + goto out; + } + + pl_inode = pl_inode_get (this, loc->inode); + if (!pl_inode) { + op_errno = ENOMEM; + goto out; + } + + switch (args.type) { + case CLRLK_INODE: + case CLRLK_ENTRY: + op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode, + &args, &bcount, + &gcount, + &op_errno); + if (op_ret) + goto out; + break; + case CLRLK_POSIX: + op_ret = clrlk_clear_posixlk (this, pl_inode, &args, + &bcount, &gcount, + &op_errno); + if (op_ret) + goto out; + break; + case CLRLK_TYPE_MAX: + op_errno = EINVAL; + goto out; + } + + op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname); + if (op_ret) { + gf_log (this->name, GF_LOG_WARNING, + "Couldn't get brickname"); + } else { + op_ret = format_brickname(brickname); + if (op_ret) { + gf_log (this->name, GF_LOG_WARNING, + "Couldn't format brickname"); + GF_FREE(brickname); + brickname = NULL; + } + } + + if (!gcount && !bcount) { + if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d " + "granted locks=%d", + (brickname == NULL)? this->name : brickname, + (args.type == CLRLK_INODE)? "inode": + (args.type == CLRLK_ENTRY)? "entry": + (args.type == CLRLK_POSIX)? "posix": " ", + bcount, gcount) == -1) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + strncpy (key, name, strlen (name)); + if (dict_set_dynstr (dict, key, lk_summary)) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + op_ret = 0; +out: + GF_FREE(brickname); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + + GF_FREE (args.opts); + if (op_ret && lk_summary) + GF_FREE (lk_summary); + if (dict) + dict_unref (dict); + return 0; + +usual: + STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; +} + +static int +format_brickname(char *brickname) +{ + int ret = -1; + char *hostname = NULL; + char *volume = NULL; + char *saveptr = NULL; + + if (!brickname) + goto out; + + strtok_r(brickname, ":", &saveptr); + hostname = gf_strdup(strtok_r(NULL, ":", &saveptr)); + if (hostname == NULL) + goto out; + volume = gf_strdup(strtok_r(NULL, ".", &saveptr)); + if (volume == NULL) + goto out; + + sprintf(brickname, "%s:%s", hostname, volume); + + ret = 0; +out: + GF_FREE(hostname); + GF_FREE(volume); + return ret; +} + +static int +fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno, + char **brickname) +{ + int ret = -1; + loc_t loc = {0, }; + dict_t *dict = NULL; + + if (!brickname) + goto out; + + if (!op_errno) + goto out; + + uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref (inode); + + ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict, + GF_XATTR_PATHINFO_KEY); + if (ret < 0) { + *op_errno = errno; + goto out; + } + + ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname); + if (ret) + goto out; + + *brickname = gf_strdup(*brickname); + if (*brickname == NULL) { + ret = -1; + goto out; + } + + ret = 0; +out: + if (dict != NULL) { + dict_unref (dict); + } + loc_wipe(&loc); + + return ret; } int +pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno) +{ + int ret = -1; + posix_locks_private_t *priv = NULL; + char *brickname = NULL; + char *end = NULL; + char *tmp = NULL; + + priv = this->private; + + ret = fetch_pathinfo (this, inode, op_errno, &brickname); + if (ret) + goto out; + + end = strrchr (brickname, ':'); + if (!end) { + GF_FREE(brickname); + ret = -1; + goto out; + } + + tmp = brickname; + brickname = gf_strndup (brickname, (end - brickname)); + if (brickname == NULL) { + ret = -1; + goto out; + } + + priv->brickname = brickname; + ret = 0; +out: + GF_FREE(tmp); + return ret; +} + +char * +pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno) +{ + posix_locks_private_t *priv = NULL; + char *key = NULL; + int ret = 0; + + priv = this->private; + + if (priv->brickname == NULL) { + ret = pl_lockinfo_get_brickname (this, inode, op_errno); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "cannot get brickname"); + goto out; + } + } + + key = priv->brickname; +out: + return key; +} + +int32_t +pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd, + dict_t *dict, int32_t *op_errno) +{ + pl_inode_t *pl_inode = NULL; + char *key = NULL, *buf = NULL; + int32_t op_ret = 0; + unsigned long fdnum = 0, len = 0; + dict_t *tmp = NULL; + + pl_inode = pl_inode_get (this, fd->inode); + + if (!pl_inode) { + gf_log (this->name, GF_LOG_DEBUG, "Could not get inode."); + *op_errno = EBADFD; + op_ret = -1; + goto out; + } + + if (!pl_locks_by_fd (pl_inode, fd)) { + op_ret = 0; + goto out; + } + + fdnum = fd_to_fdnum (fd); + + key = pl_lockinfo_key (this, fd->inode, op_errno); + if (key == NULL) { + op_ret = -1; + goto out; + } + + tmp = dict_new (); + if (tmp == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_set_uint64 (tmp, key, fdnum); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value " + "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", + fdnum, fd, uuid_utoa (fd->inode->gfid), + strerror (*op_errno)); + goto out; + } + + len = dict_serialized_length (tmp); + if (len < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, + "dict_serialized_length failed (%s) while handling " + "lockinfo for fd (ptr:%p inode-gfid:%s)", + strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid)); + goto out; + } + + buf = GF_CALLOC (1, len, gf_common_mt_char); + if (buf == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_serialize (tmp, buf); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, + "dict_serialize failed (%s) while handling lockinfo " + "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno), + fd, uuid_utoa (fd->inode->gfid)); + goto out; + } + + op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value " + "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", + fdnum, fd, uuid_utoa (fd->inode->gfid), + strerror (*op_errno)); + goto out; + } + + buf = NULL; +out: + if (tmp != NULL) { + dict_unref (tmp); + } + + if (buf != NULL) { + GF_FREE (buf); + } + + return op_ret; +} + + +int32_t +pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + int32_t op_ret = 0, op_errno = 0; + dict_t *dict = NULL; + + if (!name) { + goto usual; + } + + if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) { + dict = dict_new (); + if (dict == NULL) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict, + &op_errno); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "getting lockinfo on fd (ptr:%p inode-gfid:%s) " + "failed (%s)", fd, uuid_utoa (fd->inode->gfid), + strerror (op_errno)); + } + + goto unwind; + } else { + goto usual; + } + +unwind: + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); + if (dict != NULL) { + dict_unref (dict); + } + + return 0; + +usual: + STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; +} + +int32_t +pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num, + int32_t *op_errno) +{ + pl_inode_t *pl_inode = NULL; + uint64_t newfd_num = 0; + posix_lock_t *l = NULL; + int32_t op_ret = 0; + + newfd_num = fd_to_fdnum (newfd); + + pl_inode = pl_inode_get (frame->this, newfd->inode); + if (pl_inode == NULL) { + op_ret = -1; + *op_errno = EBADFD; + goto out; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (l->fd_num == oldfd_num) { + l->fd_num = newfd_num; + l->client = frame->root->client; + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + op_ret = 0; +out: + return op_ret; +} + +int32_t +pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf, + int len, int32_t *op_errno) +{ + int32_t op_ret = -1; + dict_t *lockinfo = NULL; + uint64_t oldfd_num = 0; + char *key = NULL; + + lockinfo = dict_new (); + if (lockinfo == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + goto out; + } + + key = pl_lockinfo_key (frame->this, fd->inode, op_errno); + if (key == NULL) { + op_ret = -1; + goto out; + } + + op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num); + + if (oldfd_num == 0) { + op_ret = 0; + goto out; + } + + op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno); + if (op_ret < 0) { + gf_log (frame->this->name, GF_LOG_WARNING, + "migration of locks from oldfd (ptr:%p) to newfd " + "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd, + uuid_utoa (fd->inode->gfid)); + goto out; + } + +out: + dict_unref (lockinfo); + + return op_ret; +} + +int32_t +pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + int32_t op_ret = 0, op_errno = 0; + void *lockinfo_buf = NULL; + int len = 0; + + op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY, + &lockinfo_buf, &len); + if (lockinfo_buf == NULL) { + goto usual; + } + + op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len, + &op_errno); + if (op_ret < 0) { + goto unwind; + } + +usual: + STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +pl_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, dict_t *xdata) +{ + pl_fdctx_t *fdctx = NULL; + + if (op_ret < 0) + goto unwind; + + fdctx = pl_check_n_create_fdctx (this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } + +unwind: + STACK_UNWIND_STRICT (opendir, + frame, + op_ret, + op_errno, + fd, xdata); + return 0; +} + +int32_t +pl_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd, dict_t *xdata) +{ + STACK_WIND (frame, + pl_opendir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + loc, fd, xdata); + return 0; + +} + +int pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno); + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); - return 0; + return 0; } int pl_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; + pl_inode_t *pl_inode = NULL; - priv = this->private; + pl_inode = pl_inode_get (this, fd->inode); - pl_inode = pl_inode_get (this, fd->inode); + if (!pl_inode) { + gf_log (this->name, GF_LOG_DEBUG, "Could not get inode."); + STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL); + return 0; + } - if (!pl_inode) { - gf_log (this->name, GF_LOG_DEBUG, "Could not get inode."); - STACK_UNWIND (frame, -1, EBADFD); - return 0; - } + pl_trace_flush (this, frame, fd); - pthread_mutex_lock (&pl_inode->mutex); - { - __delete_locks_of_owner (pl_inode, frame->root->trans, - frame->root->pid); - } - pthread_mutex_unlock (&pl_inode->mutex); + if (frame->root->lk_owner.len == 0) { + /* Handle special case when protocol/server sets lk-owner to zero. + * This usually happens due to a client disconnection. Hence, free + * all locks opened with this fd. + */ + gf_log (this->name, GF_LOG_TRACE, + "Releasing all locks with fd %p", fd); + delete_locks_of_fd (this, pl_inode, fd); + goto wind; - grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX); - grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL); + } + pthread_mutex_lock (&pl_inode->mutex); + { + __delete_locks_of_owner (pl_inode, frame->root->client, + &frame->root->lk_owner); + } + pthread_mutex_unlock (&pl_inode->mutex); - do_blocked_rw (pl_inode); + grant_blocked_locks (this, pl_inode); - STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd); - return 0; + do_blocked_rw (pl_inode); + +wind: + STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; } int pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno, fd); + pl_fdctx_t *fdctx = NULL; + + if (op_ret < 0) + goto unwind; + + fdctx = pl_check_n_create_fdctx (this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } + +unwind: + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); - return 0; + return 0; } int -pl_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd) +pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - /* why isn't O_TRUNC being handled ? */ - STACK_WIND (frame, pl_open_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, - loc, flags & ~O_TRUNC, fd); + STACK_WIND (frame, pl_open_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, + loc, flags, fd, xdata); - return 0; + return 0; } int pl_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - fd_t *fd, inode_t *inode, struct stat *buf) + xlator_t *this, int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + pl_fdctx_t *fdctx = NULL; - return 0; + if (op_ret < 0) + goto unwind; + + fdctx = pl_check_n_create_fdctx (this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } + +unwind: + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + + return 0; } int pl_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) + loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) { - STACK_WIND (frame, pl_create_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, - loc, flags, mode, fd); - return 0; + STACK_WIND (frame, pl_create_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, + loc, flags, mode, umask, fd, xdata); + return 0; } int pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, struct stat *stbuf, - struct iobref *iobref) + int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct iatt *stbuf, + struct iobref *iobref, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref); + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, + vector, count, stbuf, iobref, xdata); - return 0; + return 0; } int -pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *stbuf) +pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno, stbuf); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - return 0; + return 0; } void do_blocked_rw (pl_inode_t *pl_inode) { - struct list_head wind_list; - pl_rw_req_t *rw = NULL; - pl_rw_req_t *tmp = NULL; - - INIT_LIST_HEAD (&wind_list); - - pthread_mutex_lock (&pl_inode->mutex); - { - list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) { - if (__rw_allowable (pl_inode, &rw->region, - rw->stub->fop)) { - list_del_init (&rw->list); - list_add_tail (&rw->list, &wind_list); - } - } - } - pthread_mutex_unlock (&pl_inode->mutex); + struct list_head wind_list; + pl_rw_req_t *rw = NULL; + pl_rw_req_t *tmp = NULL; + + INIT_LIST_HEAD (&wind_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) { + if (__rw_allowable (pl_inode, &rw->region, + rw->stub->fop)) { + list_del_init (&rw->list); + list_add_tail (&rw->list, &wind_list); + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); - list_for_each_entry_safe (rw, tmp, &wind_list, list) { - list_del_init (&rw->list); - call_resume (rw->stub); - free (rw); - } + list_for_each_entry_safe (rw, tmp, &wind_list, list) { + list_del_init (&rw->list); + call_resume (rw->stub); + GF_FREE (rw); + } - return; + return; } static int __rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region, - glusterfs_fop_t op) + glusterfs_fop_t op) { - posix_lock_t *l = NULL; - int ret = 1; - - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (locks_overlap (l, region) && !same_owner (l, region)) { - if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK)) - continue; - ret = 0; - break; - } - } + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (locks_overlap (l, region) && !same_owner (l, region)) { + if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK)) + continue; + ret = 0; + break; + } + } - return ret; + return ret; } int -pl_readv_cont (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset) +pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - STACK_WIND (frame, pl_readv_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - fd, size, offset); + STACK_WIND (frame, pl_readv_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, + fd, size, offset, flags, xdata); - return 0; + return 0; } int pl_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset) -{ - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; - pl_rw_req_t *rw = NULL; - posix_lock_t region = {.list = {0, }, }; - int op_ret = 0; - int op_errno = 0; - char wind_needed = 1; - - - priv = this->private; - pl_inode = pl_inode_get (this, fd->inode); - - if (priv->mandatory && pl_inode->mandatory) { - region.fl_start = offset; - region.fl_end = offset + size - 1; - region.transport = frame->root->trans; - region.client_pid = frame->root->pid; - - pthread_mutex_lock (&pl_inode->mutex); - { - wind_needed = __rw_allowable (pl_inode, ®ion, + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = {.list = {0, }, }; + int op_ret = 0; + int op_errno = 0; + char wind_needed = 1; + + + priv = this->private; + pl_inode = pl_inode_get (this, fd->inode); + + if (priv->mandatory && pl_inode->mandatory) { + region.fl_start = offset; + region.fl_end = offset + size - 1; + region.client = frame->root->client; + region.fd_num = fd_to_fdnum(fd); + region.client_pid = frame->root->pid; + region.owner = frame->root->lk_owner; + + pthread_mutex_lock (&pl_inode->mutex); + { + wind_needed = __rw_allowable (pl_inode, ®ion, GF_FOP_READ); - if (wind_needed) { - goto unlock; + if (wind_needed) { + goto unlock; + } + + if (fd->flags & O_NONBLOCK) { + gf_log (this->name, GF_LOG_TRACE, + "returning EAGAIN as fd is O_NONBLOCK"); + op_errno = EAGAIN; + op_ret = -1; + goto unlock; + } + + rw = GF_CALLOC (1, sizeof (*rw), + gf_locks_mt_pl_rw_req_t); + if (!rw) { + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } + + rw->stub = fop_readv_stub (frame, pl_readv_cont, + fd, size, offset, flags, + xdata); + if (!rw->stub) { + op_errno = ENOMEM; + op_ret = -1; + GF_FREE (rw); + goto unlock; } - if (fd->flags & O_NONBLOCK) { - gf_log (this->name, GF_LOG_TRACE, - "returning EAGAIN as fd is O_NONBLOCK"); - op_errno = EAGAIN; - op_ret = -1; - goto unlock; - } - - rw = CALLOC (1, sizeof (*rw)); - if (!rw) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - op_ret = -1; - goto unlock; - } - - rw->stub = fop_readv_stub (frame, pl_readv_cont, - fd, size, offset); - if (!rw->stub) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - op_ret = -1; - free (rw); - goto unlock; - } - - rw->region = region; - - list_add_tail (&rw->list, &pl_inode->rw_list); - } - unlock: - pthread_mutex_unlock (&pl_inode->mutex); - } + rw->region = region; + + list_add_tail (&rw->list, &pl_inode->rw_list); + } + unlock: + pthread_mutex_unlock (&pl_inode->mutex); + } if (wind_needed) { - STACK_WIND (frame, pl_readv_cbk, + STACK_WIND (frame, pl_readv_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - fd, size, offset); + fd, size, offset, flags, xdata); } - if (op_ret == -1) - STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL); + if (op_ret == -1) + STACK_UNWIND_STRICT (readv, frame, -1, op_errno, + NULL, 0, NULL, NULL, NULL); - return 0; + return 0; } int pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int count, off_t offset, - struct iobref *iobref) + struct iovec *vector, int count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { - STACK_WIND (frame, pl_writev_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, - fd, vector, count, offset, iobref); + STACK_WIND (frame, pl_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset, flags, iobref, xdata); - return 0; + return 0; } int pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) -{ - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; - pl_rw_req_t *rw = NULL; - posix_lock_t region = {.list = {0, }, }; - int op_ret = 0; - int op_errno = 0; - char wind_needed = 1; - - - priv = this->private; - pl_inode = pl_inode_get (this, fd->inode); - - if (priv->mandatory && pl_inode->mandatory) { - region.fl_start = offset; - region.fl_end = offset + iov_length (vector, count) - 1; - region.transport = frame->root->trans; - region.client_pid = frame->root->pid; - - pthread_mutex_lock (&pl_inode->mutex); - { - wind_needed = __rw_allowable (pl_inode, ®ion, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = {.list = {0, }, }; + int op_ret = 0; + int op_errno = 0; + char wind_needed = 1; + + priv = this->private; + pl_inode = pl_inode_get (this, fd->inode); + + if (priv->mandatory && pl_inode->mandatory) { + region.fl_start = offset; + region.fl_end = offset + iov_length (vector, count) - 1; + region.client = frame->root->client; + region.fd_num = fd_to_fdnum(fd); + region.client_pid = frame->root->pid; + region.owner = frame->root->lk_owner; + + pthread_mutex_lock (&pl_inode->mutex); + { + wind_needed = __rw_allowable (pl_inode, ®ion, GF_FOP_WRITE); - if (wind_needed) - goto unlock; + if (wind_needed) + goto unlock; - if (fd->flags & O_NONBLOCK) { - gf_log (this->name, GF_LOG_TRACE, - "returning EAGAIN because fd is " + if (fd->flags & O_NONBLOCK) { + gf_log (this->name, GF_LOG_TRACE, + "returning EAGAIN because fd is " "O_NONBLOCK"); - op_errno = EAGAIN; - op_ret = -1; - goto unlock; - } - - rw = CALLOC (1, sizeof (*rw)); - if (!rw) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - op_ret = -1; - goto unlock; - } - - rw->stub = fop_writev_stub (frame, pl_writev_cont, - fd, vector, count, offset, - iobref); - if (!rw->stub) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - op_ret = -1; - free (rw); - goto unlock; - } - - rw->region = region; - - list_add_tail (&rw->list, &pl_inode->rw_list); - } - unlock: - pthread_mutex_unlock (&pl_inode->mutex); - } + op_errno = EAGAIN; + op_ret = -1; + goto unlock; + } + + rw = GF_CALLOC (1, sizeof (*rw), + gf_locks_mt_pl_rw_req_t); + if (!rw) { + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } + + rw->stub = fop_writev_stub (frame, pl_writev_cont, + fd, vector, count, offset, + flags, iobref, xdata); + if (!rw->stub) { + op_errno = ENOMEM; + op_ret = -1; + GF_FREE (rw); + goto unlock; + } + + rw->region = region; + + list_add_tail (&rw->list, &pl_inode->rw_list); + } + unlock: + pthread_mutex_unlock (&pl_inode->mutex); + } if (wind_needed) - STACK_WIND (frame, pl_writev_cbk, + STACK_WIND (frame, pl_writev_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, - fd, vector, count, offset, iobref); + fd, vector, count, offset, flags, iobref, xdata); + + if (op_ret == -1) + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, + NULL); + + return 0; +} + +static int +__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd) +{ + int found = 0; + posix_lock_t *l = NULL; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + found = 1; + break; + } + } + + return found; +} + +static posix_lock_t * +lock_dup (posix_lock_t *lock) +{ + posix_lock_t *new_lock = NULL; + + new_lock = new_posix_lock (&lock->user_flock, lock->client, + lock->client_pid, &lock->owner, + (fd_t *)lock->fd_num); + return new_lock; +} + +static int +__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, + pl_fdctx_t *fdctx) +{ + posix_lock_t *l = NULL; + posix_lock_t *duplock = NULL; + int ret = 0; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if ((l->fd_num == fd_to_fdnum(fd))) { + duplock = lock_dup (l); + if (!duplock) { + ret = -1; + break; + } + + list_add_tail (&duplock->list, &fdctx->locks_list); + } + } + + return ret; +} + +static int +__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, + pl_fdctx_t *fdctx) +{ + int ret = 0; + + ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx); + if (ret) + goto out; - if (op_ret == -1) - STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); +out: + return ret; + +} - return 0; +static void +pl_mark_eol_lock (posix_lock_t *lock) +{ + lock->user_flock.l_type = GF_LK_EOL; + return; } +static posix_lock_t * +__get_next_fdctx_lock (pl_fdctx_t *fdctx) +{ + posix_lock_t *lock = NULL; + + GF_ASSERT (fdctx); + + if (list_empty (&fdctx->locks_list)) { + gf_log (THIS->name, GF_LOG_DEBUG, + "fdctx lock list empty"); + goto out; + } + + lock = list_entry (fdctx->locks_list.next, typeof (*lock), + list); + + GF_ASSERT (lock); + + list_del_init (&lock->list); + +out: + return lock; +} + +static int +__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock) +{ + posix_lock_t *lock = NULL; + int ret = 0; + + GF_ASSERT (fdctx); + + lock = __get_next_fdctx_lock (fdctx); + if (!lock) { + gf_log (THIS->name, GF_LOG_DEBUG, + "marking EOL in reqlock"); + pl_mark_eol_lock (reqlock); + goto out; + } + + reqlock->user_flock = lock->user_flock; + reqlock->fl_start = lock->fl_start; + reqlock->fl_type = lock->fl_type; + reqlock->fl_end = lock->fl_end; + reqlock->owner = lock->owner; + +out: + if (lock) + __destroy_lock (lock); + + return ret; +} + +static int +pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode, + fd_t *fd, posix_lock_t *reqlock) +{ + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + int ret = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (!__fd_has_locks (pl_inode, fd)) { + gf_log (this->name, GF_LOG_DEBUG, + "fd=%p has no active locks", fd); + ret = 0; + goto unlock; + } + + gf_log (this->name, GF_LOG_DEBUG, + "There are active locks on fd"); + + ret = fd_ctx_get (fd, this, &tmp); + fdctx = (pl_fdctx_t *)(long) tmp; + + if (list_empty (&fdctx->locks_list)) { + gf_log (this->name, GF_LOG_TRACE, + "no fdctx -> copying all locks on fd"); + + ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx); + if (ret) { + goto unlock; + } + + ret = __set_next_lock_fd (fdctx, reqlock); + + } else { + gf_log (this->name, GF_LOG_TRACE, + "fdctx present -> returning the next lock"); + ret = __set_next_lock_fd (fdctx, reqlock); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "could not get next lock of fd"); + goto unlock; + } + } + } + +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + return ret; + +} int pl_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct flock *flock) -{ - transport_t *transport = NULL; - pid_t client_pid = 0; - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; - int op_ret = 0; - int op_errno = 0; - int can_block = 0; - posix_lock_t *reqlock = NULL; - posix_lock_t *conf = NULL; - int ret = 0; - - transport = frame->root->trans; - client_pid = frame->root->pid; - priv = this->private; - - pl_inode = pl_inode_get (this, fd->inode); - if (!pl_inode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - reqlock = new_posix_lock (flock, transport, client_pid); - if (!reqlock) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - switch (cmd) { + fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) +{ + pl_inode_t *pl_inode = NULL; + int op_ret = 0; + int op_errno = 0; + int can_block = 0; + posix_lock_t *reqlock = NULL; + posix_lock_t *conf = NULL; + int ret = 0; + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_ret = -1; + op_errno = EINVAL; + goto unwind; + } + + pl_inode = pl_inode_get (this, fd->inode); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid, + &frame->root->lk_owner, fd); + + if (!reqlock) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pl_trace_in (this, frame, fd, NULL, cmd, flock, NULL); + + switch (cmd) { + + case F_RESLK_LCKW: + can_block = 1; + + /* fall through */ + case F_RESLK_LCK: + memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); + reqlock->frame = frame; + reqlock->this = this; + + ret = pl_reserve_setlk (this, pl_inode, reqlock, + can_block); + if (ret < 0) { + if (can_block) + goto out; + + op_ret = -1; + op_errno = -ret; + __destroy_lock (reqlock); + goto unwind; + } + /* Finally a getlk and return the call */ + conf = pl_getlk (pl_inode, reqlock); + if (conf) + posix_lock_to_flock (conf, flock); + break; + + case F_RESLK_UNLCK: + reqlock->frame = frame; + reqlock->this = this; + ret = pl_reserve_unlock (this, pl_inode, reqlock); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + } + __destroy_lock (reqlock); + goto unwind; + + break; + + case F_GETLK_FD: + reqlock->frame = frame; + reqlock->this = this; + ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); + GF_ASSERT (ret >= 0); + + ret = pl_getlk_fd (this, pl_inode, fd, reqlock); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "getting locks on fd failed"); + op_ret = -1; + op_errno = ENOLCK; + goto unwind; + } + + gf_log (this->name, GF_LOG_TRACE, + "Replying with a lock on fd for healing"); + + posix_lock_to_flock (reqlock, flock); + __destroy_lock (reqlock); + + break; #if F_GETLK != F_GETLK64 - case F_GETLK64: + case F_GETLK64: #endif - case F_GETLK: - conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX); - posix_lock_to_flock (conf, flock); - __destroy_lock (reqlock); + case F_GETLK: + conf = pl_getlk (pl_inode, reqlock); + posix_lock_to_flock (conf, flock); + __destroy_lock (reqlock); - break; + break; #if F_SETLKW != F_SETLKW64 - case F_SETLKW64: + case F_SETLKW64: #endif - case F_SETLKW: - can_block = 1; - reqlock->frame = frame; - reqlock->this = this; - reqlock->fd = fd; + case F_SETLKW: + can_block = 1; + reqlock->frame = frame; + reqlock->this = this; - /* fall through */ + /* fall through */ #if F_SETLK != F_SETLK64 - case F_SETLK64: + case F_SETLK64: #endif - case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); - ret = pl_setlk (this, pl_inode, reqlock, - can_block, GF_LOCK_POSIX); - - if (ret == -1) { - if (can_block) - goto out; - - gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); - op_ret = -1; - op_errno = EAGAIN; - __destroy_lock (reqlock); - } - } + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); + ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); + if (ret < 0) { + gf_log (this->name, GF_LOG_TRACE, + "Lock blocked due to conflicting reserve lock"); + goto out; + } + ret = pl_setlk (this, pl_inode, reqlock, + can_block); + + if (ret == -1) { + if ((can_block) && (F_UNLCK != flock->l_type)) { + pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL); + goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); + op_ret = -1; + op_errno = EAGAIN; + __destroy_lock (reqlock); + + } else if ((0 == ret) && (F_UNLCK == flock->l_type)) { + /* For NLM's last "unlock on fd" detection */ + if (pl_locks_by_fd (pl_inode, fd)) + flock->l_type = F_RDLCK; + else + flock->l_type = F_UNLCK; + } + } unwind: - STACK_UNWIND (frame, op_ret, op_errno, flock); + pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL); + pl_update_refkeeper (this, fd->inode); + + + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata); out: - return 0; + return 0; } /* TODO: this function just logs, no action required?? */ int pl_forget (xlator_t *this, - inode_t *inode) + inode_t *inode) { - pl_inode_t *pl_inode = NULL; - + pl_inode_t *pl_inode = NULL; + posix_lock_t *ext_tmp = NULL; posix_lock_t *ext_l = NULL; + struct list_head posixlks_released; - posix_lock_t *int_tmp = NULL; - posix_lock_t *int_l = NULL; + pl_inode_lock_t *ino_tmp = NULL; + pl_inode_lock_t *ino_l = NULL; + struct list_head inodelks_released; pl_rw_req_t *rw_tmp = NULL; pl_rw_req_t *rw_req = NULL; pl_entry_lock_t *entry_tmp = NULL; pl_entry_lock_t *entry_l = NULL; + struct list_head entrylks_released; + + pl_dom_list_t *dom = NULL; + pl_dom_list_t *dom_tmp = NULL; + + INIT_LIST_HEAD (&posixlks_released); + INIT_LIST_HEAD (&inodelks_released); + INIT_LIST_HEAD (&entrylks_released); + + pl_inode = pl_inode_get (this, inode); + + pthread_mutex_lock (&pl_inode->mutex); + { - pl_inode = pl_inode_get (this, inode); + if (!list_empty (&pl_inode->rw_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending R/W requests found, releasing."); - if (!list_empty (&pl_inode->rw_list)) { - gf_log (this->name, GF_LOG_DEBUG, - "Pending R/W requests found, releasing."); - - list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list, - list) { - - list_del (&rw_req->list); - FREE (rw_req); + list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list, + list) { + + list_del (&rw_req->list); + GF_FREE (rw_req); + } } - } - if (!list_empty (&pl_inode->ext_list)) { - gf_log (this->name, GF_LOG_DEBUG, - "Pending fcntl locks found, releasing."); + if (!list_empty (&pl_inode->ext_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending fcntl locks found, releasing."); + + list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list, + list) { - list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list, - list) { - - __delete_lock (pl_inode, ext_l); - __destroy_lock (ext_l); + __delete_lock (pl_inode, ext_l); + if (ext_l->blocked) { + list_add_tail (&ext_l->list, &posixlks_released); + continue; + } + __destroy_lock (ext_l); + } } - } - if (!list_empty (&pl_inode->int_list)) { - gf_log (this->name, GF_LOG_DEBUG, - "Pending inode locks found, releasing."); - list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list, - list) { - - __delete_lock (pl_inode, int_l); - __destroy_lock (int_l); + list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { + + if (!list_empty (&dom->inodelk_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending inode locks found, releasing."); + + list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) { + __delete_inode_lock (ino_l); + __pl_inodelk_unref (ino_l); + } + + list_splice_init (&dom->blocked_inodelks, &inodelks_released); + + + } + if (!list_empty (&dom->entrylk_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending entry locks found, releasing."); + + list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) { + list_del_init (&entry_l->domain_list); + + GF_FREE ((char *)entry_l->basename); + GF_FREE (entry_l->connection_id); + GF_FREE (entry_l); + } + + list_splice_init (&dom->blocked_entrylks, &entrylks_released); + } + + list_del (&dom->inode_list); + gf_log ("posix-locks", GF_LOG_TRACE, + " Cleaning up domain: %s", dom->domain); + GF_FREE ((char *)(dom->domain)); + GF_FREE (dom); } - } - if (!list_empty (&pl_inode->dir_list)) { - gf_log (this->name, GF_LOG_DEBUG, - "Pending entry locks found, releasing."); - - list_for_each_entry_safe (entry_l, entry_tmp, - &pl_inode->dir_list, inode_list) { - - list_del (&entry_l->inode_list); - FREE (entry_l); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) { + + STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0, + &ext_l->user_flock, NULL); + __destroy_lock (ext_l); + } + + list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) { + + STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL); + __pl_inodelk_unref (ino_l); + } + + list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) { + + STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL); + GF_FREE ((char *)entry_l->basename); + GF_FREE (entry_l->connection_id); + GF_FREE (entry_l); + + } + + GF_FREE (pl_inode); + + return 0; +} + +int +pl_release (xlator_t *this, fd_t *fd) +{ + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = -1; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + + if (fd == NULL) { + goto out; + } + + ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode); + if (ret != 0) + goto out; + + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + + pl_trace_release (this, fd); + + gf_log (this->name, GF_LOG_TRACE, + "Releasing all locks with fd %p", fd); + + delete_locks_of_fd (this, pl_inode, fd); + pl_update_refkeeper (this, fd->inode); + + ret = fd_ctx_del (fd, this, &tmp); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Could not get fdctx"); + goto out; + } + + fdctx = (pl_fdctx_t *)(long)tmp; + + GF_FREE (fdctx); +out: + return ret; +} + +int +pl_releasedir (xlator_t *this, fd_t *fd) +{ + int ret = -1; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + + if (fd == NULL) { + goto out; + } + + ret = fd_ctx_del (fd, this, &tmp); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Could not get fdctx"); + goto out; + } + + fdctx = (pl_fdctx_t *)(long)tmp; + + GF_FREE (fdctx); +out: + return ret; +} + +int32_t +__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode) +{ + posix_lock_t *lock = NULL; + int32_t count = 0; + + list_for_each_entry (lock, &pl_inode->ext_list, list) { + + count++; + } + + return count; +} + +int32_t +get_posixlk_count (xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = 0; + int32_t count = 0; + + ret = inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } + + pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + + pthread_mutex_lock (&pl_inode->mutex); + { + count =__get_posixlk_count (this, pl_inode); + } + pthread_mutex_unlock (&pl_inode->mutex); + +out: + return count; +} + +void +pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent, + char *basename, dict_t *dict) +{ + uint32_t entrylk = 0; + int ret = -1; + + if (!parent || !basename || !strlen (basename)) + goto out; + entrylk = check_entrylk_on_basename (this, parent, basename); +out: + ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK); + } +} + +void +pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode, + dict_t *dict) +{ + int32_t count = 0; + int ret = -1; + + count = get_entrylk_count (this, inode); + ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT); + } + +} + +void +pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict, + gf_boolean_t per_dom) +{ + int32_t count = 0; + int ret = -1; + char *domname = NULL; + + + if (per_dom){ + ret = dict_get_str (dict, GLUSTERFS_INODELK_DOM_COUNT, + &domname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "value for key %s",GLUSTERFS_INODELK_DOM_COUNT); + goto out; } - } + } + + count = get_inodelk_count (this, inode, domname); - FREE (pl_inode); + ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for " + "key %s", GLUSTERFS_INODELK_COUNT); + } - return 0; +out: + return; +} + +void +pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode, + dict_t *dict) +{ + int32_t count = 0; + int ret = -1; + + count = get_posixlk_count (this, inode); + ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT); + } + +} + +int32_t +pl_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct iatt *buf, + dict_t *xdata, + struct iatt *postparent) +{ + pl_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO (this->name, frame->local, out); + + if (op_ret) + goto out; + + local = frame->local; + + if (local->parent_entrylk_req) + pl_parent_entrylk_xattr_fill (this, local->loc.parent, + (char*)local->loc.name, xdata); + if (local->entrylk_count_req) + pl_entrylk_xattr_fill (this, inode, xdata); + if (local->inodelk_count_req) + pl_inodelk_xattr_fill (this, inode, xdata, _gf_false); + if (local->inodelk_dom_count_req) + pl_inodelk_xattr_fill (this, inode, xdata, _gf_true); + if (local->posixlk_count_req) + pl_posixlk_xattr_fill (this, inode, xdata); + + +out: + local = frame->local; + frame->local = NULL; + + if (local != NULL) { + loc_wipe (&local->loc); + mem_put (local); + } + + STACK_UNWIND_STRICT ( + lookup, + frame, + op_ret, + op_errno, + inode, + buf, + xdata, + postparent); + return 0; +} + +int32_t +pl_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xdata) +{ + pl_local_t *local = NULL; + int ret = -1; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + if (xdata) { + if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT)) + local->entrylk_count_req = 1; + if (dict_get (xdata, GLUSTERFS_INODELK_COUNT)) + local->inodelk_count_req = 1; + if (dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT)) + local->inodelk_dom_count_req = 1; + if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT)) + local->posixlk_count_req = 1; + if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK)) + local->parent_entrylk_req = 1; + } + + frame->local = local; + loc_copy (&local->loc, loc); + + STACK_WIND (frame, + pl_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, xdata); + ret = 0; +out: + if (ret == -1) + STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, + NULL, NULL, NULL); + + return 0; +} +int +pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + pl_local_t *local = NULL; + gf_dirent_t *entry = NULL; + + local = frame->local; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry (entry, &entries->list, list) { + if (local->entrylk_count_req) + pl_entrylk_xattr_fill (this, entry->inode, entry->dict); + if (local->inodelk_count_req) + pl_inodelk_xattr_fill (this, entry->inode, entry->dict, + _gf_false); + if (local->inodelk_dom_count_req) + pl_inodelk_xattr_fill (this, entry->inode, entry->dict, + _gf_true); + if (local->posixlk_count_req) + pl_posixlk_xattr_fill (this, entry->inode, entry->dict); + } + +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + + if (local) + mem_put (local); + + return 0; +} + +int +pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + pl_local_t *local = NULL; + + local = mem_get0 (this->local_pool); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + if (dict) { + if (dict_get (dict, GLUSTERFS_ENTRYLK_COUNT)) + local->entrylk_count_req = 1; + if (dict_get (dict, GLUSTERFS_INODELK_COUNT)) + local->inodelk_count_req = 1; + if (dict_get (dict, GLUSTERFS_INODELK_DOM_COUNT)) + local->inodelk_dom_count_req = 1; + if (dict_get (dict, GLUSTERFS_POSIXLK_COUNT)) + local->posixlk_count_req = 1; + } + + frame->local = local; + + STACK_WIND (frame, pl_readdirp_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + + return 0; +out: + STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; } void -pl_dump_inode_priv (inode_t *inode) +pl_dump_lock (char *str, int size, struct gf_flock *flock, + gf_lkowner_t *owner, void *trans, char *conn_id, + time_t *granted_time, time_t *blkd_time, gf_boolean_t active) { + char *type_str = NULL; + char granted[32] = {0,}; + char blocked[32] = {0,}; + + switch (flock->l_type) { + case F_RDLCK: + type_str = "READ"; + break; + case F_WRLCK: + type_str = "WRITE"; + break; + case F_UNLCK: + type_str = "UNLOCK"; + break; + default: + type_str = "UNKNOWN"; + break; + } - int ret = -1; - uint64_t tmp_pl_inode = 0; - pl_inode_t *pl_inode = NULL; - char key[GF_DUMP_MAX_BUF_LEN]; + if (active) { + if (blkd_time && *blkd_time == 0) { + snprintf (str, size, RANGE_GRNTD_FMT, + type_str, flock->l_whence, + (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner), trans, conn_id, + ctime_r (granted_time, granted)); + } else { + snprintf (str, size, RANGE_BLKD_GRNTD_FMT, + type_str, flock->l_whence, + (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner), trans, conn_id, + ctime_r (blkd_time, blocked), + ctime_r (granted_time, granted)); + } + } + else { + snprintf (str, size, RANGE_BLKD_FMT, + type_str, flock->l_whence, + (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner), trans, conn_id, + ctime_r (blkd_time, blocked)); + } + +} + +void +__dump_entrylks (pl_inode_t *pl_inode) +{ + pl_dom_list_t *dom = NULL; + pl_entry_lock_t *lock = NULL; + char blocked[32] = {0,}; + char granted[32] = {0,}; + int count = 0; + char key[GF_DUMP_MAX_BUF_LEN] = {0,}; + + char tmp[256]; + + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + + count = 0; + + gf_proc_dump_build_key(key, + "lock-dump.domain", + "domain"); + gf_proc_dump_write(key, "%s", dom->domain); + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + + gf_proc_dump_build_key(key, + "xlator.feature.locks.lock-dump.domain.entrylk", + "entrylk[%d](ACTIVE)", count ); + if (lock->blkd_time.tv_sec == 0 && lock->blkd_time.tv_usec == 0) { + snprintf (tmp, 256, ENTRY_GRNTD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK", lock->basename, + (unsigned long long) lock->client_pid, + lkowner_utoa (&lock->owner), lock->trans, + lock->connection_id, + ctime_r (&lock->granted_time.tv_sec, granted)); + } else { + snprintf (tmp, 256, ENTRY_BLKD_GRNTD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK", lock->basename, + (unsigned long long) lock->client_pid, + lkowner_utoa (&lock->owner), lock->trans, + lock->connection_id, + ctime_r (&lock->blkd_time.tv_sec, blocked), + ctime_r (&lock->granted_time.tv_sec, granted)); + } + + gf_proc_dump_write(key, tmp); + + count++; + } + + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + + gf_proc_dump_build_key(key, + "xlator.feature.locks.lock-dump.domain.entrylk", + "entrylk[%d](BLOCKED)", count ); + snprintf (tmp, 256, ENTRY_BLKD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK", lock->basename, + (unsigned long long) lock->client_pid, + lkowner_utoa (&lock->owner), lock->trans, + lock->connection_id, + ctime_r (&lock->blkd_time.tv_sec, blocked)); + + gf_proc_dump_write(key, tmp); + + count++; + } + + } + +} + +void +dump_entrylks (pl_inode_t *pl_inode) +{ + pthread_mutex_lock (&pl_inode->mutex); + { + __dump_entrylks (pl_inode); + } + pthread_mutex_unlock (&pl_inode->mutex); + +} + +void +__dump_inodelks (pl_inode_t *pl_inode) +{ + pl_dom_list_t *dom = NULL; + pl_inode_lock_t *lock = NULL; + int count = 0; + char key[GF_DUMP_MAX_BUF_LEN]; + + char tmp[256]; + + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + + count = 0; + + gf_proc_dump_build_key(key, + "lock-dump.domain", + "domain"); + gf_proc_dump_write(key, "%s", dom->domain); + + list_for_each_entry (lock, &dom->inodelk_list, list) { + + gf_proc_dump_build_key(key, + "inodelk", + "inodelk[%d](ACTIVE)",count ); + + SET_FLOCK_PID (&lock->user_flock, lock); + pl_dump_lock (tmp, 256, &lock->user_flock, + &lock->owner, + lock->client, lock->connection_id, + &lock->granted_time.tv_sec, + &lock->blkd_time.tv_sec, + _gf_true); + gf_proc_dump_write(key, tmp); + + count++; + } + + list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { + + gf_proc_dump_build_key(key, + "inodelk", + "inodelk[%d](BLOCKED)",count ); + SET_FLOCK_PID (&lock->user_flock, lock); + pl_dump_lock (tmp, 256, &lock->user_flock, + &lock->owner, + lock->client, lock->connection_id, + 0, &lock->blkd_time.tv_sec, + _gf_false); + gf_proc_dump_write(key, tmp); + + count++; + } + + } + +} + +void +dump_inodelks (pl_inode_t *pl_inode) +{ + pthread_mutex_lock (&pl_inode->mutex); + { + __dump_inodelks (pl_inode); + } + pthread_mutex_unlock (&pl_inode->mutex); + +} + +void +__dump_posixlks (pl_inode_t *pl_inode) +{ + posix_lock_t *lock = NULL; + int count = 0; + char key[GF_DUMP_MAX_BUF_LEN]; + + char tmp[256]; + + list_for_each_entry (lock, &pl_inode->ext_list, list) { + + SET_FLOCK_PID (&lock->user_flock, lock); + gf_proc_dump_build_key(key, + "posixlk", + "posixlk[%d](%s)", + count, + lock->blocked ? "BLOCKED" : "ACTIVE"); + pl_dump_lock (tmp, 256, &lock->user_flock, + &lock->owner, lock->client, NULL, + &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, + (lock->blocked)? _gf_false: _gf_true); + gf_proc_dump_write(key, tmp); + + count++; + } +} + +void +dump_posixlks (pl_inode_t *pl_inode) +{ + pthread_mutex_lock (&pl_inode->mutex); + { + __dump_posixlks (pl_inode); + } + pthread_mutex_unlock (&pl_inode->mutex); + +} + +int32_t +pl_dump_inode_priv (xlator_t *this, inode_t *inode) +{ + + int ret = -1; + uint64_t tmp_pl_inode = 0; + pl_inode_t *pl_inode = NULL; + char *pathname = NULL; + gf_boolean_t section_added = _gf_false; - if (!inode) - return; + int count = 0; - ret = inode_ctx_get (inode, inode->table->xl, &tmp_pl_inode); + if (!inode) { + errno = EINVAL; + goto out; + } + + ret = TRY_LOCK (&inode->lock); + if (ret) + goto out; + { + ret = __inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret) + goto unlock; + } +unlock: + UNLOCK (&inode->lock); + if (ret) + goto out; - if (ret != 0) - return; - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + if (!pl_inode) { + ret = -1; + goto out; + } + + gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name); + section_added = _gf_true; + + /*We are safe to call __inode_path since we have the + * inode->table->lock */ + __inode_path (inode, NULL, &pathname); + if (pathname) + gf_proc_dump_write ("path", "%s", pathname); + + gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory); + + ret = pthread_mutex_trylock (&pl_inode->mutex); + if (ret) + goto out; + { + count = __get_entrylk_count (this, pl_inode); + if (count) { + gf_proc_dump_write("entrylk-count", "%d", count); + __dump_entrylks (pl_inode); + } + + count = __get_inodelk_count (this, pl_inode, NULL); + if (count) { + gf_proc_dump_write("inodelk-count", "%d", count); + __dump_inodelks (pl_inode); + } - if (!pl_inode) - return; + count = __get_posixlk_count (this, pl_inode); + if (count) { + gf_proc_dump_write("posixlk-count", "%d", count); + __dump_posixlks (pl_inode); + } + } + pthread_mutex_unlock (&pl_inode->mutex); - gf_proc_dump_build_key(key, - "xlator.feature.locks.inode", - "%ld.%s",inode->ino, "mandatory"); - gf_proc_dump_write(key, "%d", pl_inode->mandatory); +out: + GF_FREE (pathname); + + if (ret && inode) { + if (!section_added) + gf_proc_dump_add_section ("xlator.features.locks.%s." + "inode", this->name); + gf_proc_dump_write ("Unable to print lock state", "(Lock " + "acquisition failure) %s", + uuid_utoa (inode->gfid)); + } + return ret; } +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + if (!this) + return ret; -/* - * pl_dump_inode - inode dump function for posix locks - * - */ -int -pl_dump_inode (xlator_t *this) + ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + + +pl_ctx_t* +pl_ctx_get (client_t *client, xlator_t *xlator) +{ + void *tmp = NULL; + pl_ctx_t *ctx = NULL; + + client_ctx_get (client, xlator, &tmp); + + ctx = tmp; + + if (ctx != NULL) + goto out; + + ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t); + + if (ctx == NULL) + goto out; + + ctx->ltable = pl_lock_table_new(); + + if (ctx->ltable == NULL) { + GF_FREE (ctx); + ctx = NULL; + goto out; + } + + LOCK_INIT (&ctx->ltable_lock); + + if (client_ctx_set (client, xlator, ctx) != 0) { + LOCK_DESTROY (&ctx->ltable_lock); + GF_FREE (ctx->ltable); + GF_FREE (ctx); + ctx = NULL; + } +out: + return ctx; +} + +static void +ltable_delete_locks (struct _lock_table *ltable) { + struct _locker *locker = NULL; + struct _locker *tmp = NULL; + + list_for_each_entry_safe (locker, tmp, <able->inodelk_lockers, lockers) { + if (locker->fd) + pl_del_locker (ltable, locker->volume, &locker->loc, + locker->fd, &locker->owner, + GF_FOP_INODELK); + GF_FREE (locker->volume); + GF_FREE (locker); + } - assert(this); - - if (this->itable) { - inode_table_dump(this->itable, - "xlator.features.locks.inode_table", - pl_dump_inode_priv); + list_for_each_entry_safe (locker, tmp, <able->entrylk_lockers, lockers) { + if (locker->fd) + pl_del_locker (ltable, locker->volume, &locker->loc, + locker->fd, &locker->owner, + GF_FOP_ENTRYLK); + GF_FREE (locker->volume); + GF_FREE (locker); } + GF_FREE (ltable); +} - return 0; + +static int32_t +destroy_cbk (xlator_t *this, client_t *client) +{ + void *tmp = NULL; + pl_ctx_t *locks_ctx = NULL; + + client_ctx_del (client, this, &tmp); + + if (tmp == NULL) + return 0 +; + locks_ctx = tmp; + if (locks_ctx->ltable) + ltable_delete_locks (locks_ctx->ltable); + + LOCK_DESTROY (&locks_ctx->ltable_lock); + GF_FREE (locks_ctx); + + return 0; } +static int32_t +disconnect_cbk (xlator_t *this, client_t *client) +{ + int32_t ret = 0; + pl_ctx_t *locks_ctx = NULL; + struct _lock_table *ltable = NULL; + + locks_ctx = pl_ctx_get (client, this); + if (locks_ctx == NULL) { + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto out; + } + + LOCK (&locks_ctx->ltable_lock); + { + if (locks_ctx->ltable) { + ltable = locks_ctx->ltable; + locks_ctx->ltable = pl_lock_table_new (); + } + } + UNLOCK (&locks_ctx->ltable_lock); + + if (ltable) + ltable_delete_locks (ltable); + +out: + return ret; +} + int init (xlator_t *this) { - posix_locks_private_t *priv = NULL; - xlator_list_t *trav = NULL; - data_t *mandatory = NULL; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_CRITICAL, - "FATAL: posix-locks should have exactly one child"); - return -1; - } + posix_locks_private_t *priv = NULL; + xlator_list_t *trav = NULL; + data_t *mandatory = NULL; + data_t *trace = NULL; + int ret = -1; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_CRITICAL, + "FATAL: posix-locks should have exactly one child"); + goto out; + } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling. Please check the volume file."); - } + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Volume is dangling. Please check the volume file."); + } - trav = this->children; - while (trav->xlator->children) - trav = trav->xlator->children; + trav = this->children; + while (trav->xlator->children) + trav = trav->xlator->children; - if (strncmp ("storage/", trav->xlator->type, 8)) { - gf_log (this->name, GF_LOG_CRITICAL, - "'locks' translator is not loaded over a storage " + if (strncmp ("storage/", trav->xlator->type, 8)) { + gf_log (this->name, GF_LOG_CRITICAL, + "'locks' translator is not loaded over a storage " "translator"); - return -1; - } + goto out; + } - priv = CALLOC (1, sizeof (*priv)); + priv = GF_CALLOC (1, sizeof (*priv), + gf_locks_mt_posix_locks_private_t); + + mandatory = dict_get (this->options, "mandatory-locks"); + if (mandatory) + gf_log (this->name, GF_LOG_WARNING, + "mandatory locks not supported in this minor release."); + + trace = dict_get (this->options, "trace"); + if (trace) { + if (gf_string2boolean (trace->data, + &priv->trace) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'trace' takes on only boolean values."); + goto out; + } + } - mandatory = dict_get (this->options, "mandatory-locks"); - if (mandatory) { - if (gf_string2boolean (mandatory->data, - &priv->mandatory) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "'mandatory-locks' takes on only boolean " - "values."); - return -1; - } - } + this->local_pool = mem_pool_new (pl_local_t, 32); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } - this->private = priv; - return 0; + this->private = priv; + ret = 0; + +out: + if (ret) { + GF_FREE (priv); + } + return ret; } int fini (xlator_t *this) { - posix_locks_private_t *priv = NULL; + posix_locks_private_t *priv = NULL; - priv = this->private; - free (priv); + priv = this->private; + if (!priv) + return 0; + this->private = NULL; + GF_FREE (priv->brickname); + GF_FREE (priv); - return 0; + return 0; } int -pl_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *flock); +pl_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, + dict_t *xdata); int -pl_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *flock); +pl_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata); int -pl_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type); +pl_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); int -pl_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type); +pl_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); struct xlator_fops fops = { - .create = pl_create, - .truncate = pl_truncate, - .ftruncate = pl_ftruncate, - .open = pl_open, - .readv = pl_readv, - .writev = pl_writev, - .lk = pl_lk, - .inodelk = pl_inodelk, - .finodelk = pl_finodelk, - .entrylk = pl_entrylk, - .fentrylk = pl_fentrylk, - .flush = pl_flush, -}; - - -struct xlator_mops mops = { + .lookup = pl_lookup, + .create = pl_create, + .truncate = pl_truncate, + .ftruncate = pl_ftruncate, + .open = pl_open, + .readv = pl_readv, + .writev = pl_writev, + .lk = pl_lk, + .inodelk = pl_inodelk, + .finodelk = pl_finodelk, + .entrylk = pl_entrylk, + .fentrylk = pl_fentrylk, + .flush = pl_flush, + .opendir = pl_opendir, + .readdirp = pl_readdirp, + .getxattr = pl_getxattr, + .fgetxattr = pl_fgetxattr, + .fsetxattr = pl_fsetxattr, }; struct xlator_dumpops dumpops = { - .inode = pl_dump_inode, + .inodectx = pl_dump_inode_priv, }; struct xlator_cbks cbks = { - .forget = pl_forget, + .forget = pl_forget, + .release = pl_release, + .releasedir = pl_releasedir, + .client_destroy = destroy_cbk, + .client_disconnect = disconnect_cbk, }; struct volume_options options[] = { - { .key = { "mandatory-locks", "mandatory" }, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {NULL} }, + { .key = { "mandatory-locks", "mandatory" }, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = { "trace" }, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, }; diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c new file mode 100644 index 000000000..11abd26d8 --- /dev/null +++ b/xlators/features/locks/src/reservelk.c @@ -0,0 +1,443 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +void +__delete_reserve_lock (posix_lock_t *lock) +{ + list_del (&lock->list); +} + +void +__destroy_reserve_lock (posix_lock_t *lock) +{ + GF_FREE (lock); +} + +/* Return true if the two reservelks have exactly same lock boundaries */ +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2) +{ + if ((l1->fl_start == l2->fl_start) && + (l1->fl_end == l2->fl_end)) + return 1; + + return 0; +} + +/* Determine if lock is grantable or not */ +static posix_lock_t * +__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + xlator_t *this = NULL; + posix_lock_t *l = NULL; + posix_lock_t *ret_lock = NULL; + + this = THIS; + + if (list_empty (&pl_inode->reservelk_list)) { + gf_log (this->name, GF_LOG_TRACE, + "No reservelks in list"); + goto out; + } + list_for_each_entry (l, &pl_inode->reservelk_list, list){ + if (reservelks_equal (lock, l)) { + ret_lock = l; + break; + } + } +out: + return ret_lock; +} + +static inline int +__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2) +{ + return (is_same_lkowner (&l1->owner, &l2->owner)); + +} + +static posix_lock_t * +__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + + if (list_empty (&pl_inode->reservelk_list)) { + gf_log ("posix-locks", GF_LOG_TRACE, + "reservelk list empty"); + return NULL; + } + + list_for_each_entry (l, &pl_inode->reservelk_list, list) { + if (reservelks_equal (l, lock)) { + gf_log ("posix-locks", GF_LOG_TRACE, + "equal reservelk found"); + break; + } + } + + return l; +} + +static int +__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock) +{ + posix_lock_t *conf = NULL; + int ret = 0; + + conf = __matching_reservelk (pl_inode, lock); + if (conf) { + gf_log (this->name, GF_LOG_TRACE, + "Matching reservelk found"); + if (__same_owner_reservelk (lock, conf)) { + list_del_init (&conf->list); + gf_log (this->name, GF_LOG_TRACE, + "Removing the matching reservelk for setlk to progress"); + GF_FREE (conf); + ret = 0; + } else { + gf_log (this->name, GF_LOG_TRACE, + "Conflicting reservelk found"); + ret = 1; + } + + } + return ret; + +} + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *lock, int can_block) +{ + int ret = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (__reservelk_conflict (this, pl_inode, lock)) { + gf_log (this->name, GF_LOG_TRACE, + "Found conflicting reservelk. Blocking until reservelk is unlocked."); + lock->blocked = can_block; + list_add_tail (&lock->list, &pl_inode->blocked_calls); + ret = -1; + goto unlock; + } + + gf_log (this->name, GF_LOG_TRACE, + "no conflicting reservelk found. Call continuing"); + ret = 0; + + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + return ret; + +} + + +/* Determines if lock can be granted and adds the lock. If the lock + * is blocking, adds it to the blocked_reservelks. + */ +static int +__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + posix_lock_t *conf = NULL; + int ret = -EINVAL; + + conf = __reservelk_grantable (pl_inode, lock); + if (conf){ + ret = -EAGAIN; + if (can_block == 0) + goto out; + + list_add_tail (&lock->list, &pl_inode->blocked_reservelks); + + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + + + goto out; + } + + list_add (&lock->list, &pl_inode->reservelk_list); + + ret = 0; + +out: + return ret; +} + +static posix_lock_t * +find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode) +{ + posix_lock_t *l = NULL; + list_for_each_entry (l, &pl_inode->reservelk_list, list) { + if (reservelks_equal (l, lock)) + return l; + } + return NULL; +} + +/* Set F_UNLCK removes a lock which has the exact same lock boundaries + * as the UNLCK lock specifies. If such a lock is not found, returns invalid + */ +static posix_lock_t * +__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode) +{ + + posix_lock_t *conf = NULL; + + conf = find_matching_reservelk (lock, pl_inode); + if (!conf) { + gf_log (this->name, GF_LOG_DEBUG, + " Matching lock not found for unlock"); + goto out; + } + __delete_reserve_lock (conf); + gf_log (this->name, GF_LOG_DEBUG, + " Matching lock found for unlock"); + +out: + return conf; + + +} + +static void +__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + + list_del_init (&bl->list); + + bl_ret = __lock_reservelk (this, pl_inode, bl, 1); + + if (bl_ret == 0) { + list_add (&bl->list, granted); + } + } + return; +} + +/* Grant all reservelks blocked on lock(s) */ +void +grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; + + INIT_LIST_HEAD (&granted); + + if (list_empty (&pl_inode->blocked_reservelks)) { + gf_log (this->name, GF_LOG_TRACE, + "No blocked locks to be granted"); + return; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_reserve_locks (this, pl_inode, &granted); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, list) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock, + NULL); + } + +} + +static void +__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; + + struct list_head blocked_list; + + INIT_LIST_HEAD (&blocked_list); + list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + + list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + + list_del_init (&bl->list); + + bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked); + + if (bl_ret == 0) { + list_add_tail (&bl->list, granted); + } + } + return; +} + +void +grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; + fd_t *fd = NULL; + + int can_block = 0; + int32_t cmd = 0; + int ret = 0; + + if (list_empty (&pl_inode->blocked_calls)) { + gf_log (this->name, GF_LOG_TRACE, + "No blocked lock calls to be granted"); + return; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_lock_calls (this, pl_inode, &granted); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, list) { + fd = fd_from_fdnum (lock); + + if (lock->blocked) { + can_block = 1; + cmd = F_SETLKW; + } + else + cmd = F_SETLK; + + lock->blocked = 0; + ret = pl_setlk (this, pl_inode, lock, can_block); + if (ret == -1) { + if (can_block) { + pl_trace_block (this, lock->frame, fd, NULL, + cmd, &lock->user_flock, NULL); + continue; + } else { + gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); + pl_trace_out (this, lock->frame, fd, NULL, cmd, + &lock->user_flock, -1, EAGAIN, NULL); + pl_update_refkeeper (this, fd->inode); + STACK_UNWIND_STRICT (lk, lock->frame, -1, + EAGAIN, &lock->user_flock, + NULL); + __destroy_lock (lock); + } + } + + } + +} + + +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *retlock = NULL; + int ret = -1; + + pthread_mutex_lock (&pl_inode->mutex); + { + retlock = __reserve_unlock_lock (this, lock, pl_inode); + if (!retlock) { + gf_log (this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + + gf_log (this->name, GF_LOG_TRACE, + "Reservelk Unlock successful"); + __destroy_reserve_lock (retlock); + ret = 0; + } +out: + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_reserve_locks (this, pl_inode); + grant_blocked_lock_calls (this, pl_inode); + + return ret; + +} + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + int ret = -EINVAL; + + pthread_mutex_lock (&pl_inode->mutex); + { + + ret = __lock_reservelk (this, pl_inode, lock, can_block); + if (ret < 0) + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + else + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->fl_start, + lock->fl_end); + + } + pthread_mutex_unlock (&pl_inode->mutex); + return ret; +} diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c index 06e77d56b..d2cca32de 100644 --- a/xlators/features/locks/tests/unit-test.c +++ b/xlators/features/locks/tests/unit-test.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" |
