diff options
Diffstat (limited to 'xlators/features/locks/src')
-rw-r--r-- | xlators/features/locks/src/Makefile.am | 20 | ||||
-rw-r--r-- | xlators/features/locks/src/common.c | 561 | ||||
-rw-r--r-- | xlators/features/locks/src/common.h | 59 | ||||
-rw-r--r-- | xlators/features/locks/src/internal.c | 762 | ||||
-rw-r--r-- | xlators/features/locks/src/locks.h | 111 | ||||
-rw-r--r-- | xlators/features/locks/src/posix.c | 834 |
6 files changed, 2347 insertions, 0 deletions
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am new file mode 100644 index 00000000000..ec4a953eb91 --- /dev/null +++ b/xlators/features/locks/src/Makefile.am @@ -0,0 +1,20 @@ +xlator_LTLIBRARIES = locks.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +locks_la_LDFLAGS = -module -avoidversion + +locks_la_SOURCES = common.c posix.c internal.c +locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = locks.h common.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles + +CLEANFILES = + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/posix-locks.so + +install-data-hook: + ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
\ No newline at end of file diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c new file mode 100644 index 00000000000..9ac1250cc57 --- /dev/null +++ b/xlators/features/locks/src/common.c @@ -0,0 +1,561 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" + +#include "locks.h" + + +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); +static void +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); + + +pl_inode_t * +pl_inode_get (xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + mode_t st_mode = 0; + uint64_t tmp_pl_inode = 0; + int ret = 0; + + LOCK (&inode->lock); + { + ret = inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto out; + } + + pl_inode = CALLOC (1, sizeof (*pl_inode)); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + st_mode = inode->st_mode; + if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) + pl_inode->mandatory = 1; + + + pthread_mutex_init (&pl_inode->mutex, NULL); + + INIT_LIST_HEAD (&pl_inode->dir_list); + INIT_LIST_HEAD (&pl_inode->ext_list); + INIT_LIST_HEAD (&pl_inode->int_list); + INIT_LIST_HEAD (&pl_inode->rw_list); + + ret = inode_ctx_put (inode, this, (uint64_t)(long)pl_inode); + } +out: + UNLOCK (&inode->lock); + return pl_inode; +} + + +/* Create a new posix_lock_t */ +posix_lock_t * +new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid) +{ + posix_lock_t *lock = NULL; + + lock = CALLOC (1, sizeof (posix_lock_t)); + if (!lock) { + return NULL; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->transport = transport; + lock->client_pid = client_pid; + + INIT_LIST_HEAD (&lock->list); + + return lock; +} + + +/* Delete a lock from the inode's lock list */ +void +__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + list_del_init (&lock->list); +} + + +/* Destroy a posix_lock */ +void +__destroy_lock (posix_lock_t *lock) +{ + free (lock); +} + + +/* Convert a posix_lock to a struct flock */ +void +posix_lock_to_flock (posix_lock_t *lock, struct flock *flock) +{ + flock->l_pid = lock->client_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + + if (lock->fl_end == 0) + flock->l_len = LLONG_MAX; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; +} + + +/* Insert the lock into the inode's lock list */ +void +pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +{ + list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom)); + + return; +} + + +/* Return true if the locks overlap, false otherwise */ +int +locks_overlap (posix_lock_t *l1, posix_lock_t *l2) +{ + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); +} + + +/* Return true if the locks have the same owner */ +int +same_owner (posix_lock_t *l1, posix_lock_t *l2) +{ + return ((l1->client_pid == l2->client_pid) && + (l1->transport == l2->transport)); +} + + +/* Delete all F_UNLCK locks */ +void +__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + + list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->fl_type == F_UNLCK) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } +} + + +/* Add two locks */ +static posix_lock_t * +add_locks (posix_lock_t *l1, posix_lock_t *l2) +{ + posix_lock_t *sum = NULL; + + sum = CALLOC (1, sizeof (posix_lock_t)); + if (!sum) + return NULL; + + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); + + return sum; +} + +/* Subtract two locks */ +struct _values { + posix_lock_t *locks[3]; +}; + +/* {big} must always be contained inside {small} */ +static struct _values +subtract_locks (posix_lock_t *big, posix_lock_t *small) +{ + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + } + else if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + v.locks[2] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[2]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + memcpy (v.locks[2], big, sizeof (posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + } + /* one edge coincides with big */ + else if (small->fl_start == big->fl_start) { + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + } + else if (small->fl_end == big->fl_end) { + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + } + else { + gf_log ("posix-locks", GF_LOG_DEBUG, + "unexpected case in subtract_locks"); + } + + return v; +} + +/* + Start searching from {begin}, and return the first lock that + conflicts, NULL if no conflict + If {begin} is NULL, then start from the beginning of the list +*/ +static posix_lock_t * +first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + + list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->blocked) + continue; + + if (locks_overlap (l, lock)) + return l; + } + + return NULL; +} + + + +/* Return true if lock is grantable */ +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { + if (!l->blocked && locks_overlap (lock, l)) { + if (((l->fl_type == F_WRLCK) + || (lock->fl_type == F_WRLCK)) + && (lock->fl_type != F_UNLCK) + && !same_owner (l, lock)) { + ret = 0; + break; + } + } + } + return ret; +} + + +extern void do_blocked_rw (pl_inode_t *); + + +static void +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *conf = NULL; + posix_lock_t *t = NULL; + posix_lock_t *sum = NULL; + int i = 0; + struct _values v = { .locks = {0, 0, 0} }; + + list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) { + if (!locks_overlap (conf, lock)) + continue; + + if (same_owner (conf, lock)) { + if (conf->fl_type == lock->fl_type) { + sum = add_locks (lock, conf); + + sum->fl_type = lock->fl_type; + sum->transport = lock->transport; + sum->client_pid = lock->client_pid; + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __destroy_lock (lock); + __insert_and_merge (pl_inode, sum, dom); + + return; + } else { + sum = add_locks (lock, conf); + + sum->fl_type = conf->fl_type; + sum->transport = conf->transport; + sum->client_pid = conf->client_pid; + + v = subtract_locks (sum, lock); + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __delete_lock (pl_inode, lock); + __destroy_lock (lock); + + __destroy_lock (sum); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + if (v.locks[i]->fl_type == F_UNLCK) { + __destroy_lock (v.locks[i]); + continue; + } + __insert_and_merge (pl_inode, + v.locks[i], dom); + } + + __delete_unlck_locks (pl_inode, dom); + return; + } + } + + if (lock->fl_type == F_UNLCK) { + continue; + } + + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + pl_insert_lock (pl_inode, lock, dom); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + pl_insert_lock (pl_inode, lock, dom); + } else { + __destroy_lock (lock); + } +} + + +void +__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, + gf_lk_domain_t dom, struct list_head *granted) +{ + struct list_head tmp_list; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *conf = NULL; + + INIT_LIST_HEAD (&tmp_list); + + list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->blocked) { + conf = first_overlap (pl_inode, l, dom); + if (conf) + continue; + + l->blocked = 0; + list_move_tail (&l->list, &tmp_list); + } + } + + list_for_each_entry_safe (l, tmp, &tmp_list, list) { + list_del_init (&l->list); + + if (pl_is_lock_grantable (pl_inode, l, dom)) { + conf = CALLOC (1, sizeof (*conf)); + + if (!conf) { + l->blocked = 1; + pl_insert_lock (pl_inode, l, dom); + continue; + } + + conf->frame = l->frame; + l->frame = NULL; + + posix_lock_to_flock (l, &conf->user_flock); + + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => Granted", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", + l->client_pid, + l->user_flock.l_start, + l->user_flock.l_len); + + __insert_and_merge (pl_inode, l, dom); + + list_add (&conf->list, granted); + } else { + l->blocked = 1; + pl_insert_lock (pl_inode, l, dom); + } + } +} + + +void +grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom) +{ + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + + INIT_LIST_HEAD (&granted_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_locks (this, pl_inode, dom, &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); + + STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock); + + FREE (lock); + } + + return; +} + + +int +pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block, gf_lk_domain_t dom) +{ + int ret = 0; + + errno = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (pl_is_lock_grantable (pl_inode, lock, dom)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + __insert_and_merge (pl_inode, lock, dom); + } else if (can_block) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + lock->blocked = 1; + pl_insert_lock (pl_inode, lock, dom); + ret = -1; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + errno = EAGAIN; + ret = -1; + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_locks (this, pl_inode, dom); + + do_blocked_rw (pl_inode); + + return ret; +} + + +posix_lock_t * +pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +{ + posix_lock_t *conf = NULL; + + conf = first_overlap (pl_inode, lock, dom); + + if (conf == NULL) { + lock->fl_type = F_UNLCK; + return lock; + } + + return conf; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h new file mode 100644 index 00000000000..135f33011bf --- /dev/null +++ b/xlators/features/locks/src/common.h @@ -0,0 +1,59 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __COMMON_H__ +#define __COMMON_H__ + +posix_lock_t * +new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid); + +pl_inode_t * +pl_inode_get (xlator_t *this, inode_t *inode); + +posix_lock_t * +pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain); + +int +pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, + int can_block, gf_lk_domain_t domain); + +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); + +void +pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom); + +void +grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain); + +void +posix_lock_to_flock (posix_lock_t *lock, struct flock *flock); + +int +locks_overlap (posix_lock_t *l1, posix_lock_t *l2); + +int +same_owner (posix_lock_t *l1, posix_lock_t *l2); + +void __delete_lock (pl_inode_t *, posix_lock_t *); + +void __destroy_lock (posix_lock_t *); + +#endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/internal.c b/xlators/features/locks/src/internal.c new file mode 100644 index 00000000000..7f454a78e22 --- /dev/null +++ b/xlators/features/locks/src/internal.c @@ -0,0 +1,762 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + + + +static int +delete_locks_of_transport (pl_inode_t *pinode, transport_t *trans) +{ + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + + list_for_each_entry_safe (l, tmp, &pinode->dir_list, list) { + if (l->transport == trans) { + __delete_lock (pinode, tmp); + __destroy_lock (tmp); + } + } + + return 0; +} + + +static posix_lock_t * +__find_exact_matching_lock (pl_inode_t *pinode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + posix_lock_t *match = NULL; + + list_for_each_entry (l, DOMAIN_HEAD (pinode, GF_LOCK_INTERNAL), list) { + if (same_owner (l, lock) + && (l->fl_start == lock->fl_start) + && (l->fl_end == lock->fl_end)) { + match = l; + break; + } + } + + return match; +} + +/** + * pl_inodelk: + * + * This fop provides fcntl-style locking on files for internal + * purposes. Locks held through this fop reside in a domain different + * from those held by applications. This fop is for the use of AFR. + */ + + +static int +pl_inodelk_common (call_frame_t *frame, xlator_t *this, + inode_t *inode, int32_t cmd, struct flock *flock) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int can_block = 0; + + posix_locks_private_t * priv = NULL; + transport_t * transport = NULL; + pid_t client_pid = -1; + pl_inode_t * pinode = NULL; + + posix_lock_t * reqlock = NULL; + posix_lock_t * matchlock = NULL; /* steady, fire! */ + + VALIDATE_OR_GOTO (frame, unwind); + VALIDATE_OR_GOTO (inode, unwind); + VALIDATE_OR_GOTO (flock, unwind); + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_errno = EINVAL; + goto unwind; + } + + transport = frame->root->trans; + client_pid = frame->root->pid; + + priv = (posix_locks_private_t *) this->private; + + VALIDATE_OR_GOTO (priv, unwind); + + pinode = pl_inode_get (this, inode); + if (!pinode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto unwind; + } + + if (client_pid == 0) { + /* + special case: this means release all locks + from this transport + */ + + gf_log (this->name, GF_LOG_DEBUG, + "releasing all locks from transport %p", transport); + + delete_locks_of_transport (pinode, transport); + goto unwind; + } + + reqlock = new_posix_lock (flock, transport, client_pid); + if (!reqlock) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pthread_mutex_lock (&pinode->mutex); + { + switch (cmd) { + case F_SETLKW: + can_block = 1; + reqlock->frame = frame; + reqlock->this = this; + + /* fall through */ + + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + + switch (flock->l_type) { + + case F_WRLCK: + if (!pl_is_lock_grantable (pinode, reqlock, GF_LOCK_INTERNAL)) { + if (can_block) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => blocked", + reqlock->fl_type == F_UNLCK ? "unlock" : "lock", + reqlock->client_pid, + reqlock->user_flock.l_start, + reqlock->user_flock.l_len); + pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL); + + goto unlock; + } + + __destroy_lock (reqlock); + + + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", + reqlock->fl_type == F_UNLCK ? "unlock" : "lock", + reqlock->client_pid, reqlock->user_flock.l_start, + reqlock->user_flock.l_len); + op_errno = EAGAIN; + + goto unlock; + } + + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => OK", + reqlock->fl_type == F_UNLCK ? "unlock" : "lock", + reqlock->client_pid, + reqlock->user_flock.l_start, + reqlock->user_flock.l_len); + pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL); + + break; + + case F_UNLCK: + matchlock = __find_exact_matching_lock (pinode, reqlock); + + __destroy_lock (reqlock); + if (!matchlock) { + op_errno = EINVAL; + goto unlock; + } + + __delete_lock (pinode, matchlock); + __destroy_lock (matchlock); + + break; + + default: + op_errno = ENOTSUP; + gf_log (this->name, GF_LOG_ERROR, + "lock type %d not supported for [F]INODELK", + flock->l_type); + goto unlock; + } + + + break; + + default: + op_errno = ENOTSUP; + gf_log (this->name, GF_LOG_ERROR, + "lock command F_GETLK not supported for [F]INODELK (cmd=%d)", + cmd); + goto unlock; + } + + op_ret = 0; + + unlock: + if (pinode) + pthread_mutex_unlock (&pinode->mutex); + } + +unwind: + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int +pl_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *flock) +{ + return pl_inodelk_common (frame, this, loc->inode, cmd, flock); +} + + +int +pl_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *flock) +{ + return pl_inodelk_common (frame, this, fd->inode, cmd, flock); +} + + +/** + * types_conflict - do two types of lock conflict? + * @t1: type + * @t2: type + * + * two read locks do not conflict + * any other case conflicts + */ + +static int +types_conflict (entrylk_type t1, entrylk_type t2) +{ + return !((t1 == ENTRYLK_RDLCK) && (t2 == ENTRYLK_RDLCK)); +} + +/** + * all_names - does a basename represent all names? + * @basename: name to check + */ + +#define all_names(basename) ((basename == NULL) ? 1 : 0) + +/** + * names_conflict - do two names conflict? + * @n1: name + * @n2: name + */ + +static int +names_conflict (const char *n1, const char *n2) +{ + return all_names (n1) || all_names (n2) || !strcmp (n1, n2); +} + + +static int +names_equal (const char *n1, const char *n2) +{ + return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); +} + +/** + * lock_grantable - is this lock grantable? + * @inode: inode in which to look + * @basename: name we're trying to lock + * @type: type of lock + */ + +static pl_entry_lock_t * +__lock_grantable (pl_inode_t *pinode, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + + if (list_empty (&pinode->dir_list)) + return NULL; + + list_for_each_entry (lock, &pinode->dir_list, inode_list) { + if (names_conflict (lock->basename, basename) && + types_conflict (lock->type, type)) + return lock; + } + + return NULL; +} + +/** + * find_most_matching_lock - find the lock struct which most matches in order of: + * lock on the exact basename || + * an all_names lock + * + * + * @inode: inode in which to look + * @basename: name to search for + */ + +static pl_entry_lock_t * +__find_most_matching_lock (pl_inode_t *pinode, const char *basename) +{ + pl_entry_lock_t *lock; + pl_entry_lock_t *all = NULL; + pl_entry_lock_t *exact = NULL; + + if (list_empty (&pinode->dir_list)) + return NULL; + + list_for_each_entry (lock, &pinode->dir_list, inode_list) { + if (all_names (lock->basename)) + all = lock; + else if (names_equal (lock->basename, basename)) + exact = lock; + } + + return (exact ? exact : all); +} + + +/** + * insert_new_lock - insert a new dir lock into the inode with the given parameters + * @pinode: inode to insert into + * @basename: basename for the lock + * @type: type of the lock + */ + +static pl_entry_lock_t * +new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, + transport_t *trans) +{ + pl_entry_lock_t *newlock = NULL; + + newlock = CALLOC (sizeof (pl_entry_lock_t), 1); + if (!newlock) { + goto out; + } + + newlock->basename = basename ? strdup (basename) : NULL; + newlock->type = type; + newlock->trans = trans; + + if (type == ENTRYLK_RDLCK) + newlock->read_count = 1; + + INIT_LIST_HEAD (&newlock->inode_list); + INIT_LIST_HEAD (&newlock->blocked_locks); + +out: + return newlock; +} + +/** + * lock_name - lock a name in a directory + * @inode: inode for the directory in which to lock + * @basename: name of the entry to lock + * if null, lock the entire directory + * + * the entire directory being locked is represented as: a single + * pl_entry_lock_t present in the entrylk_locks list with its + * basename = NULL + */ + +int +__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, + call_frame_t *frame, xlator_t *this, int nonblock) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *conf = NULL; + + transport_t *trans = NULL; + + int ret = -EINVAL; + + trans = frame->root->trans; + + conf = __lock_grantable (pinode, basename, type); + if (conf) { + ret = -EAGAIN; + if (nonblock) + goto out; + + lock = new_entrylk_lock (pinode, basename, type, trans); + + if (!lock) { + ret = -ENOMEM; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "blocking lock: {pinode=%p, basename=%s}", + pinode, basename); + + lock->frame = frame; + lock->this = this; + lock->blocked = 1; + + list_add (&lock->blocked_locks, &conf->blocked_locks); + + + goto out; + } + + switch (type) { + case ENTRYLK_RDLCK: + lock = __find_most_matching_lock (pinode, basename); + + if (lock && names_equal (lock->basename, basename)) { + lock->read_count++; + + FREE (lock->basename); + FREE (lock); + + lock = NULL; + } else { + lock = new_entrylk_lock (pinode, basename, type, trans); + + if (!lock) { + ret = -ENOMEM; + goto out; + } + + list_add (&lock->inode_list, &pinode->dir_list); + } + break; + + case ENTRYLK_WRLCK: + lock = new_entrylk_lock (pinode, basename, type, trans); + + if (!lock) { + ret = -ENOMEM; + goto out; + } + + list_add (&lock->inode_list, &pinode->dir_list); + break; + } + + ret = 0; +out: + return ret; +} + + +/** + * unlock_name - unlock a name in a directory + * @inode: inode for the directory to unlock in + * @basename: name of the entry to unlock + * if null, unlock the entire directory + */ + +pl_entry_lock_t * +__unlock_name (pl_inode_t *pinode, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *ret_lock = NULL; + + lock = __find_most_matching_lock (pinode, basename); + + if (!lock) { + gf_log ("locks", GF_LOG_DEBUG, + "unlock on %s (type=%s) attempted but no matching lock found", + basename, type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK"); + goto out; + } + + if (names_equal (lock->basename, basename) + && lock->type == type) { + if (type == ENTRYLK_RDLCK) { + lock->read_count--; + } + if (type == ENTRYLK_WRLCK || lock->read_count == 0) { + list_del (&lock->inode_list); + ret_lock = lock; + } + } else { + gf_log ("locks", GF_LOG_ERROR, + "unlock for a non-existing lock!"); + goto out; + } + +out: + return ret_lock; +} + + +void +__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *lock, + struct list_head *granted) +{ + int bl_ret = 0; + pl_entry_lock_t *bl = NULL; + pl_entry_lock_t *tmp = NULL; + + list_for_each_entry_safe (bl, tmp, &lock->blocked_locks, + blocked_locks) { + list_del_init (&bl->blocked_locks); + + /* TODO: error checking */ + + gf_log ("locks", GF_LOG_DEBUG, + "trying to unblock: {pinode=%p, basename=%s}", + pl_inode, bl->basename); + + bl_ret = __lock_name (pl_inode, bl->basename, bl->type, + bl->frame, bl->this, 0); + + if (bl_ret == 0) { + list_add (&bl->blocked_locks, granted); + } else { + if (bl->basename) + FREE (bl->basename); + FREE (bl); + } + } + return; +} + + +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *unlocked) +{ + struct list_head granted_list; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lock = NULL; + + INIT_LIST_HEAD (&granted_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_entry_locks (this, pl_inode, unlocked, + &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND (lock->frame, 0, 0); + + FREE (lock->basename); + FREE (lock); + } + + FREE (unlocked->basename); + FREE (unlocked); + + return; +} + + +/** + * release_entry_locks_for_transport: release all entry locks from this + * transport for this loc_t + */ + +static int +release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode, + transport_t *trans) +{ + pl_entry_lock_t *lock; + pl_entry_lock_t *tmp; + struct list_head granted; + + INIT_LIST_HEAD (&granted); + + pthread_mutex_lock (&pinode->mutex); + { + if (list_empty (&pinode->dir_list)) { + goto unlock; + } + + list_for_each_entry_safe (lock, tmp, &pinode->dir_list, + inode_list) { + if (lock->trans != trans) + continue; + + list_del_init (&lock->inode_list); + __grant_blocked_entry_locks (this, pinode, lock, + &granted); + + FREE (lock->basename); + FREE (lock); + } + } +unlock: + pthread_mutex_unlock (&pinode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND (lock->frame, 0, 0); + + FREE (lock->basename); + FREE (lock); + } + + return 0; +} + + +/** + * pl_entrylk: + * + * Locking on names (directory entries) + */ + +int +pl_entrylk_common (call_frame_t *frame, xlator_t *this, + inode_t *inode, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + + transport_t * transport = NULL; + pid_t pid = -1; + + pl_inode_t * pinode = NULL; + int ret = -1; + pl_entry_lock_t *unlocked = NULL; + char unwind = 1; + + pinode = pl_inode_get (this, inode); + if (!pinode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto out; + } + + pid = frame->root->pid; + transport = frame->root->trans; + + if (pid == 0) { + /* + this is a special case that means release + all locks from this transport + */ + + gf_log (this->name, GF_LOG_DEBUG, + "releasing locks for transport %p", transport); + + release_entry_locks_for_transport (this, pinode, transport); + op_ret = 0; + + goto out; + } + + switch (cmd) { + case ENTRYLK_LOCK: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, this, 0); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + if (ret == -EAGAIN) + unwind = 0; + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_LOCK_NB: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, this, 1); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_UNLOCK: + pthread_mutex_lock (&pinode->mutex); + { + unlocked = __unlock_name (pinode, basename, type); + } + pthread_mutex_unlock (&pinode->mutex); + + if (unlocked) + grant_blocked_entry_locks (this, pinode, unlocked); + + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "unexpected case!"); + goto out; + } + + op_ret = 0; +out: + if (unwind) { + STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + + +int +pl_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + return pl_entrylk_common (frame, this, loc->inode, basename, cmd, type); +} + + +int +pl_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + return pl_entrylk_common (frame, this, fd->inode, basename, cmd, type); +} diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h new file mode 100644 index 00000000000..8ed7bb63f1c --- /dev/null +++ b/xlators/features/locks/src/locks.h @@ -0,0 +1,111 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __POSIX_LOCKS_H__ +#define __POSIX_LOCKS_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "compat-errno.h" +#include "transport.h" +#include "stack.h" +#include "call-stub.h" + +struct __pl_fd; + +struct __posix_lock { + struct list_head list; + + short fl_type; + off_t fl_start; + off_t fl_end; + + short blocked; /* waiting to acquire */ + struct flock user_flock; /* the flock supplied by the user */ + xlator_t *this; /* required for blocked locks */ + fd_t *fd; + + call_frame_t *frame; + + /* These two together serve to uniquely identify each process + across nodes */ + + transport_t *transport; /* to identify client node */ + pid_t client_pid; /* pid of client process */ +}; +typedef struct __posix_lock posix_lock_t; + +struct __pl_rw_req_t { + struct list_head list; + call_stub_t *stub; + posix_lock_t region; +}; +typedef struct __pl_rw_req_t pl_rw_req_t; + + +struct __entry_lock { + struct list_head inode_list; /* list_head back to pl_inode_t */ + struct list_head blocked_locks; /* locks blocked due to this lock */ + + call_frame_t *frame; + xlator_t *this; + int blocked; + + const char *basename; + entrylk_type type; + unsigned int read_count; /* number of read locks */ + transport_t *trans; +}; +typedef struct __entry_lock pl_entry_lock_t; + + +/* The "simulated" inode. This contains a list of all the locks associated + with this file */ + +struct __pl_inode { + pthread_mutex_t mutex; + + struct list_head dir_list; /* list of entry locks */ + struct list_head ext_list; /* list of fcntl locks */ + struct list_head int_list; /* list of internal locks */ + struct list_head rw_list; /* list of waiting r/w requests */ + int mandatory; /* if mandatory locking is enabled */ +}; +typedef struct __pl_inode pl_inode_t; + +#define DOMAIN_HEAD(pl_inode, dom) (dom == GF_LOCK_POSIX \ + ? &pl_inode->ext_list \ + : &pl_inode->int_list) + + +struct __pl_fd { + gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */ +}; +typedef struct __pl_fd pl_fd_t; + + +typedef struct { + gf_boolean_t mandatory; /* if mandatory locking is enabled */ +} posix_locks_private_t; + + +#endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c new file mode 100644 index 00000000000..e2b336607c4 --- /dev/null +++ b/xlators/features/locks/src/posix.c @@ -0,0 +1,834 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" + +#include "locks.h" +#include "common.h" + +#ifndef LLONG_MAX +#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ +#endif /* LLONG_MAX */ + +/* Forward declarations */ + + +void do_blocked_rw (pl_inode_t *); +static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t); + +struct _truncate_ops { + loc_t loc; + fd_t *fd; + off_t offset; + enum {TRUNCATE, FTRUNCATE} op; +}; + + +int +pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct _truncate_ops *local = NULL; + + local = frame->local; + + if (local->op == TRUNCATE) + loc_wipe (&local->loc); + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +static int +truncate_allowed (pl_inode_t *pl_inode, + transport_t *transport, pid_t client_pid, + off_t offset) +{ + posix_lock_t *l = NULL; + posix_lock_t region = {.list = {0, }, }; + int ret = 1; + + region.fl_start = offset; + region.fl_end = LLONG_MAX; + region.transport = transport; + region.client_pid = client_pid; + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (!l->blocked + && locks_overlap (®ion, l) + && !same_owner (®ion, l)) { + ret = 0; + break; + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + return ret; +} + + +static int +truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + posix_locks_private_t *priv = NULL; + struct _truncate_ops *local = NULL; + inode_t *inode = NULL; + pl_inode_t *pl_inode = NULL; + + + priv = this->private; + local = frame->local; + + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "got error (errno=%d, stderror=%s) from child", + op_errno, strerror (op_errno)); + goto unwind; + } + + if (local->op == TRUNCATE) + inode = local->loc.inode; + else + inode = local->fd->inode; + + pl_inode = pl_inode_get (this, inode); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, + "unable to get pl_inode from %p", inode); + op_errno = ENOMEM; + goto unwind; + } + + if (priv->mandatory + && pl_inode->mandatory + && !truncate_allowed (pl_inode, frame->root->trans, + frame->root->pid, local->offset)) { + op_errno = EAGAIN; + goto unwind; + } + + switch (local->op) { + case TRUNCATE: + STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->truncate, + &local->loc, local->offset); + break; + case FTRUNCATE: + STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->ftruncate, + local->fd, local->offset); + break; + } + + return 0; + +unwind: + if (local->op == TRUNCATE) + loc_wipe (&local->loc); + + STACK_UNWIND (frame, -1, ENOMEM, buf); + return 0; +} + + +int +pl_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset) +{ + struct _truncate_ops *local = NULL; + + local = CALLOC (1, sizeof (struct _truncate_ops)); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto unwind; + } + + local->op = TRUNCATE; + local->offset = offset; + loc_copy (&local->loc, loc); + + frame->local = local; + + STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->stat, loc); + + return 0; + +unwind: + STACK_UNWIND (frame, -1, ENOMEM, NULL); + + return 0; +} + + +int +pl_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + struct _truncate_ops *local = NULL; + + local = CALLOC (1, sizeof (struct _truncate_ops)); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto unwind; + } + + local->op = FTRUNCATE; + local->offset = offset; + local->fd = fd; + + frame->local = local; + + STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd); + return 0; + +unwind: + STACK_UNWIND (frame, -1, ENOMEM, NULL); + + return 0; +} + + +static void +__delete_locks_of_owner (pl_inode_t *pl_inode, + transport_t *transport, pid_t pid) +{ + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + + /* TODO: what if it is a blocked lock with pending l->frame */ + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if ((l->transport == transport) + && (l->client_pid == pid)) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } + + list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) { + if ((l->transport == transport) + && (l->client_pid == pid)) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } + + return; +} + + +int +pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + +int +pl_flush (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __delete_locks_of_owner (pl_inode, frame->root->trans, + frame->root->pid); + } + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX); + grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL); + + do_blocked_rw (pl_inode); + + STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd); + return 0; +} + + +int +pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + + +int +pl_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd) +{ + /* why isn't O_TRUNC being handled ? */ + STACK_WIND (frame, pl_open_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, + loc, flags & ~O_TRUNC, fd); + + return 0; +} + + +int +pl_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + + return 0; +} + + +int +pl_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + STACK_WIND (frame, pl_create_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, + loc, flags, mode, fd); + return 0; +} + + +int +pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + + return 0; +} + +int +pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + + return 0; +} + + +void +do_blocked_rw (pl_inode_t *pl_inode) +{ + struct list_head wind_list; + pl_rw_req_t *rw = NULL; + pl_rw_req_t *tmp = NULL; + + INIT_LIST_HEAD (&wind_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) { + if (__rw_allowable (pl_inode, &rw->region, + rw->stub->fop)) { + list_del_init (&rw->list); + list_add_tail (&rw->list, &wind_list); + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (rw, tmp, &wind_list, list) { + list_del_init (&rw->list); + call_resume (rw->stub); + free (rw); + } + + return; +} + + +static int +__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region, + glusterfs_fop_t op) +{ + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (locks_overlap (l, region) && !same_owner (l, region)) { + if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK)) + continue; + ret = 0; + break; + } + } + + return ret; +} + + +int +pl_readv_cont (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + STACK_WIND (frame, pl_readv_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, + fd, size, offset); + + return 0; +} + + +int +pl_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = {.list = {0, }, }; + int op_ret = 0; + int op_errno = 0; + char allowable = 0; + + + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + + if (priv->mandatory && pl_inode->mandatory) { + region.fl_start = offset; + region.fl_end = offset + size - 1; + region.transport = frame->root->trans; + region.client_pid = frame->root->pid; + + pthread_mutex_lock (&pl_inode->mutex); + { + allowable = __rw_allowable (pl_inode, ®ion, + GF_FOP_READ); + if (allowable) + goto unlock; + + if (fd->flags & O_NONBLOCK) { + gf_log (this->name, GF_LOG_DEBUG, + "returning EWOULDBLOCK"); + op_errno = EWOULDBLOCK; + op_ret = -1; + goto unlock; + } + + rw = CALLOC (1, sizeof (*rw)); + if (!rw) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } + + rw->stub = fop_readv_stub (frame, pl_readv_cont, + fd, size, offset); + if (!rw->stub) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + free (rw); + goto unlock; + } + + rw->region = region; + + list_add_tail (&rw->list, &pl_inode->rw_list); + } + unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + goto unwind; + } + + + STACK_WIND (frame, pl_readv_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, + fd, size, offset); + return 0; + +unwind: + if (op_ret == -1) + STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + + return 0; +} + + +int +pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int count, off_t offset) +{ + STACK_WIND (frame, pl_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset); + + return 0; +} + + +int +pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = {.list = {0, }, }; + int op_ret = 0; + int op_errno = 0; + char allowable = 0; + + + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + if (priv->mandatory && pl_inode->mandatory) { + region.fl_start = offset; + region.fl_end = offset + iov_length (vector, count) - 1; + region.transport = frame->root->trans; + region.client_pid = frame->root->pid; + + pthread_mutex_lock (&pl_inode->mutex); + { + allowable = __rw_allowable (pl_inode, ®ion, + GF_FOP_WRITE); + if (allowable) + goto unlock; + + if (fd->flags & O_NONBLOCK) { + gf_log (this->name, GF_LOG_DEBUG, + "returning EWOULDBLOCK"); + op_errno = EWOULDBLOCK; + op_ret = -1; + goto unlock; + } + + rw = CALLOC (1, sizeof (*rw)); + if (!rw) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } + + rw->stub = fop_writev_stub (frame, pl_writev_cont, + fd, vector, count, offset); + if (!rw->stub) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + free (rw); + goto unlock; + } + + rw->region = region; + + list_add_tail (&rw->list, &pl_inode->rw_list); + } + unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + goto unwind; + } + + + STACK_WIND (frame, pl_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset); + return 0; + +unwind: + if (op_ret == -1) + STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + + return 0; +} + + +int +pl_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *flock) +{ + transport_t *transport = NULL; + pid_t client_pid = 0; + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + int op_ret = 0; + int op_errno = 0; + int can_block = 0; + posix_lock_t *reqlock = NULL; + posix_lock_t *conf = NULL; + int ret = 0; + + transport = frame->root->trans; + client_pid = frame->root->pid; + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + reqlock = new_posix_lock (flock, transport, client_pid); + if (!reqlock) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + switch (cmd) { + +#if F_GETLK != F_GETLK64 + case F_GETLK64: +#endif + case F_GETLK: + conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX); + posix_lock_to_flock (conf, flock); + __destroy_lock (reqlock); + + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + can_block = 1; + reqlock->frame = frame; + reqlock->this = this; + reqlock->fd = fd; + + /* fall through */ + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + ret = pl_setlk (this, pl_inode, reqlock, + can_block, GF_LOCK_POSIX); + + if (ret == -1) { + if (can_block) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); + op_ret = -1; + op_errno = EAGAIN; + __destroy_lock (reqlock); + } + } + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, flock); +out: + return 0; +} + + +/* TODO: this function just logs, no action required?? */ +int +pl_forget (xlator_t *this, + inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + + pl_inode = pl_inode_get (this, inode); + + if (!list_empty (&pl_inode->rw_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "pending R/W requests found!"); + } + + if (!list_empty (&pl_inode->ext_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Pending fcntl locks found!"); + } + + if (!list_empty (&pl_inode->int_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Pending internal locks found!"); + } + + if (!list_empty (&pl_inode->dir_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Pending entry locks found!"); + } + + FREE (pl_inode); + + return 0; +} + + +int +init (xlator_t *this) +{ + posix_locks_private_t *priv = NULL; + xlator_list_t *trav = NULL; + data_t *mandatory = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: posix-locks should have exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + trav = this->children; + while (trav->xlator->children) + trav = trav->xlator->children; + + if (strncmp ("storage/", trav->xlator->type, 8)) { + gf_log (this->name, GF_LOG_ERROR, + "'posix-locks' not loaded over storage translator"); + return -1; + } + + priv = CALLOC (1, sizeof (*priv)); + + mandatory = dict_get (this->options, "mandatory-locks"); + if (mandatory) { + if (gf_string2boolean (mandatory->data, + &priv->mandatory) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'mandatory-locks' takes only boolean " + "options"); + return -1; + } + } + + this->private = priv; + return 0; +} + + +int +fini (xlator_t *this) +{ + posix_locks_private_t *priv = NULL; + + priv = this->private; + free (priv); + + return 0; +} + + +int +pl_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *flock); + +int +pl_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *flock); + +int +pl_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +int +pl_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +struct xlator_fops fops = { + .create = pl_create, + .truncate = pl_truncate, + .ftruncate = pl_ftruncate, + .open = pl_open, + .readv = pl_readv, + .writev = pl_writev, + .lk = pl_lk, + .inodelk = pl_inodelk, + .finodelk = pl_finodelk, + .entrylk = pl_entrylk, + .fentrylk = pl_fentrylk, + .flush = pl_flush, +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { + .forget = pl_forget, +}; + + +struct volume_options options[] = { + { .key = { "mandatory-locks", "mandatory" }, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; |