diff options
Diffstat (limited to 'xlators/features/locks/src/common.c')
-rw-r--r-- | xlators/features/locks/src/common.c | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c new file mode 100644 index 000000000..9ac1250cc --- /dev/null +++ b/xlators/features/locks/src/common.c @@ -0,0 +1,561 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" + +#include "locks.h" + + +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); +static void +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); + + +pl_inode_t * +pl_inode_get (xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + mode_t st_mode = 0; + uint64_t tmp_pl_inode = 0; + int ret = 0; + + LOCK (&inode->lock); + { + ret = inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto out; + } + + pl_inode = CALLOC (1, sizeof (*pl_inode)); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + st_mode = inode->st_mode; + if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) + pl_inode->mandatory = 1; + + + pthread_mutex_init (&pl_inode->mutex, NULL); + + INIT_LIST_HEAD (&pl_inode->dir_list); + INIT_LIST_HEAD (&pl_inode->ext_list); + INIT_LIST_HEAD (&pl_inode->int_list); + INIT_LIST_HEAD (&pl_inode->rw_list); + + ret = inode_ctx_put (inode, this, (uint64_t)(long)pl_inode); + } +out: + UNLOCK (&inode->lock); + return pl_inode; +} + + +/* Create a new posix_lock_t */ +posix_lock_t * +new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid) +{ + posix_lock_t *lock = NULL; + + lock = CALLOC (1, sizeof (posix_lock_t)); + if (!lock) { + return NULL; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->transport = transport; + lock->client_pid = client_pid; + + INIT_LIST_HEAD (&lock->list); + + return lock; +} + + +/* Delete a lock from the inode's lock list */ +void +__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + list_del_init (&lock->list); +} + + +/* Destroy a posix_lock */ +void +__destroy_lock (posix_lock_t *lock) +{ + free (lock); +} + + +/* Convert a posix_lock to a struct flock */ +void +posix_lock_to_flock (posix_lock_t *lock, struct flock *flock) +{ + flock->l_pid = lock->client_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + + if (lock->fl_end == 0) + flock->l_len = LLONG_MAX; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; +} + + +/* Insert the lock into the inode's lock list */ +void +pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +{ + list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom)); + + return; +} + + +/* Return true if the locks overlap, false otherwise */ +int +locks_overlap (posix_lock_t *l1, posix_lock_t *l2) +{ + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); +} + + +/* Return true if the locks have the same owner */ +int +same_owner (posix_lock_t *l1, posix_lock_t *l2) +{ + return ((l1->client_pid == l2->client_pid) && + (l1->transport == l2->transport)); +} + + +/* Delete all F_UNLCK locks */ +void +__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + + list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->fl_type == F_UNLCK) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } +} + + +/* Add two locks */ +static posix_lock_t * +add_locks (posix_lock_t *l1, posix_lock_t *l2) +{ + posix_lock_t *sum = NULL; + + sum = CALLOC (1, sizeof (posix_lock_t)); + if (!sum) + return NULL; + + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); + + return sum; +} + +/* Subtract two locks */ +struct _values { + posix_lock_t *locks[3]; +}; + +/* {big} must always be contained inside {small} */ +static struct _values +subtract_locks (posix_lock_t *big, posix_lock_t *small) +{ + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + } + else if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + v.locks[2] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[2]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + memcpy (v.locks[2], big, sizeof (posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + } + /* one edge coincides with big */ + else if (small->fl_start == big->fl_start) { + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + } + else if (small->fl_end == big->fl_end) { + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + } + else { + gf_log ("posix-locks", GF_LOG_DEBUG, + "unexpected case in subtract_locks"); + } + + return v; +} + +/* + Start searching from {begin}, and return the first lock that + conflicts, NULL if no conflict + If {begin} is NULL, then start from the beginning of the list +*/ +static posix_lock_t * +first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + + list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->blocked) + continue; + + if (locks_overlap (l, lock)) + return l; + } + + return NULL; +} + + + +/* Return true if lock is grantable */ +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { + if (!l->blocked && locks_overlap (lock, l)) { + if (((l->fl_type == F_WRLCK) + || (lock->fl_type == F_WRLCK)) + && (lock->fl_type != F_UNLCK) + && !same_owner (l, lock)) { + ret = 0; + break; + } + } + } + return ret; +} + + +extern void do_blocked_rw (pl_inode_t *); + + +static void +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *conf = NULL; + posix_lock_t *t = NULL; + posix_lock_t *sum = NULL; + int i = 0; + struct _values v = { .locks = {0, 0, 0} }; + + list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) { + if (!locks_overlap (conf, lock)) + continue; + + if (same_owner (conf, lock)) { + if (conf->fl_type == lock->fl_type) { + sum = add_locks (lock, conf); + + sum->fl_type = lock->fl_type; + sum->transport = lock->transport; + sum->client_pid = lock->client_pid; + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __destroy_lock (lock); + __insert_and_merge (pl_inode, sum, dom); + + return; + } else { + sum = add_locks (lock, conf); + + sum->fl_type = conf->fl_type; + sum->transport = conf->transport; + sum->client_pid = conf->client_pid; + + v = subtract_locks (sum, lock); + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __delete_lock (pl_inode, lock); + __destroy_lock (lock); + + __destroy_lock (sum); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + if (v.locks[i]->fl_type == F_UNLCK) { + __destroy_lock (v.locks[i]); + continue; + } + __insert_and_merge (pl_inode, + v.locks[i], dom); + } + + __delete_unlck_locks (pl_inode, dom); + return; + } + } + + if (lock->fl_type == F_UNLCK) { + continue; + } + + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + pl_insert_lock (pl_inode, lock, dom); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + pl_insert_lock (pl_inode, lock, dom); + } else { + __destroy_lock (lock); + } +} + + +void +__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, + gf_lk_domain_t dom, struct list_head *granted) +{ + struct list_head tmp_list; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *conf = NULL; + + INIT_LIST_HEAD (&tmp_list); + + list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->blocked) { + conf = first_overlap (pl_inode, l, dom); + if (conf) + continue; + + l->blocked = 0; + list_move_tail (&l->list, &tmp_list); + } + } + + list_for_each_entry_safe (l, tmp, &tmp_list, list) { + list_del_init (&l->list); + + if (pl_is_lock_grantable (pl_inode, l, dom)) { + conf = CALLOC (1, sizeof (*conf)); + + if (!conf) { + l->blocked = 1; + pl_insert_lock (pl_inode, l, dom); + continue; + } + + conf->frame = l->frame; + l->frame = NULL; + + posix_lock_to_flock (l, &conf->user_flock); + + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => Granted", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", + l->client_pid, + l->user_flock.l_start, + l->user_flock.l_len); + + __insert_and_merge (pl_inode, l, dom); + + list_add (&conf->list, granted); + } else { + l->blocked = 1; + pl_insert_lock (pl_inode, l, dom); + } + } +} + + +void +grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom) +{ + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + + INIT_LIST_HEAD (&granted_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_locks (this, pl_inode, dom, &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); + + STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock); + + FREE (lock); + } + + return; +} + + +int +pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block, gf_lk_domain_t dom) +{ + int ret = 0; + + errno = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (pl_is_lock_grantable (pl_inode, lock, dom)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + __insert_and_merge (pl_inode, lock, dom); + } else if (can_block) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + lock->blocked = 1; + pl_insert_lock (pl_inode, lock, dom); + ret = -1; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + errno = EAGAIN; + ret = -1; + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_locks (this, pl_inode, dom); + + do_blocked_rw (pl_inode); + + return ret; +} + + +posix_lock_t * +pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +{ + posix_lock_t *conf = NULL; + + conf = first_overlap (pl_inode, lock, dom); + + if (conf == NULL) { + lock->fl_type = F_UNLCK; + return lock; + } + + return conf; +} |