From 231196910d9d36af9546ddc511b26da5628b3ab8 Mon Sep 17 00:00:00 2001 From: Pavan Vilas Sondur Date: Wed, 23 Sep 2009 06:02:55 +0000 Subject: Implemented entry locks and support for domains. Signed-off-by: Anand V. Avati BUG: 222 (Enhance Internal locks to support multilple domains and rewrite inodelks) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=222 --- xlators/features/locks/src/Makefile.am | 2 +- xlators/features/locks/src/common.c | 53 ++- xlators/features/locks/src/common.h | 7 + xlators/features/locks/src/entrylk.c | 577 +++++++++++++++++++++++++++++++++ xlators/features/locks/src/locks.h | 24 +- xlators/features/locks/src/posix.c | 44 ++- 6 files changed, 681 insertions(+), 26 deletions(-) create mode 100644 xlators/features/locks/src/entrylk.c diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index f59ec6eb..451e47ed 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -3,7 +3,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features locks_la_LDFLAGS = -module -avoidversion -locks_la_SOURCES = common.c posix.c +locks_la_SOURCES = common.c posix.c entrylk.c locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = locks.h common.h diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 2a393e7d..c7137a82 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -44,6 +44,57 @@ static void __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom); +static pl_dom_list_t * +allocate_domain(const char *volume) +{ + pl_dom_list_t *dom = NULL; + + dom = CALLOC (1, sizeof (*dom)); + if (!dom) + return NULL; + + + dom->domain = strdup(volume); + if (!dom->domain) { + gf_log ("posix-locks", GF_LOG_TRACE, + "Out of Memory"); + return NULL; + } + + gf_log ("posix-locks", GF_LOG_TRACE, + "New domain allocated: %s", dom->domain); + + INIT_LIST_HEAD (&dom->inode_list); + INIT_LIST_HEAD (&dom->entrylk_list); + INIT_LIST_HEAD (&dom->blocked_entrylks); + INIT_LIST_HEAD (&dom->inodelk_list); + + return dom; +} + +/* Returns domain for the lock. If domain is not present, + * allocates a domain and returns it + */ +pl_dom_list_t * +get_domain (pl_inode_t *pl_inode, const char *volume) +{ + pl_dom_list_t *dom = NULL; + + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + if (strcmp (dom->domain, volume) == 0) + goto found; + + + } + + dom = allocate_domain(volume); + + if (dom) + list_add (&dom->inode_list, &pl_inode->dom_list); +found: + + return dom; +} pl_inode_t * pl_inode_get (xlator_t *this, inode_t *inode) @@ -72,7 +123,7 @@ pl_inode_get (xlator_t *this, inode_t *inode) pthread_mutex_init (&pl_inode->mutex, NULL); - INIT_LIST_HEAD (&pl_inode->dir_list); + INIT_LIST_HEAD (&pl_inode->dom_list); INIT_LIST_HEAD (&pl_inode->ext_list); INIT_LIST_HEAD (&pl_inode->int_list); INIT_LIST_HEAD (&pl_inode->rw_list); diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index ee17b008..93da622c 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -49,4 +49,11 @@ void __delete_lock (pl_inode_t *, posix_lock_t *); void __destroy_lock (posix_lock_t *); +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *unlocked, pl_dom_list_t *dom); + +pl_dom_list_t * +get_domain (pl_inode_t *pl_inode, const char *volume); + #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c new file mode 100644 index 00000000..0ddc9532 --- /dev/null +++ b/xlators/features/locks/src/entrylk.c @@ -0,0 +1,577 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +static pl_entry_lock_t * +new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, + transport_t *trans, pid_t client_pid, const char *volume) +{ + pl_entry_lock_t *newlock = NULL; + + newlock = CALLOC (1, sizeof (pl_entry_lock_t)); + if (!newlock) { + goto out; + } + + newlock->basename = basename ? strdup (basename) : NULL; + newlock->type = type; + newlock->trans = trans; + newlock->volume = volume; + newlock->client_pid = client_pid; + + + INIT_LIST_HEAD (&newlock->domain_list); + INIT_LIST_HEAD (&newlock->blocked_locks); + +out: + return newlock; +} + + +/** + * all_names - does a basename represent all names? + * @basename: name to check + */ + +#define all_names(basename) ((basename == NULL) ? 1 : 0) + +/** + * names_conflict - do two names conflict? + * @n1: name + * @n2: name + */ + +static int +names_conflict (const char *n1, const char *n2) +{ + return all_names (n1) || all_names (n2) || !strcmp (n1, n2); +} + + +static int +__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) +{ + return ((l1->client_pid == l2->client_pid) && + (l1->trans == l2->trans)); +} + + +/** + * lock_grantable - is this lock grantable? + * @inode: inode in which to look + * @basename: name we're trying to lock + * @type: type of lock + */ +static pl_entry_lock_t * +__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + + if (list_empty (&dom->entrylk_list)) + return NULL; + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (names_conflict (lock->basename, basename)) + return lock; + } + + return NULL; +} + +static pl_entry_lock_t * +__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + + if (list_empty (&dom->blocked_entrylks)) + return NULL; + + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (names_conflict (lock->basename, basename)) + return lock; + } + + return NULL; +} + +static int +__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock) +{ + pl_entry_lock_t *lock = NULL; + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (__same_entrylk_owner (lock, newlock)) + return 1; + } + + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (__same_entrylk_owner (lock, newlock)) + return 1; + } + + return 0; +} + +static int +names_equal (const char *n1, const char *n2) +{ + return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); +} + +/** + * __find_most_matching_lock - find the lock struct which most matches in order of: + * lock on the exact basename || + * an all_names lock + * + * + * @inode: inode in which to look + * @basename: name to search for + */ + +static pl_entry_lock_t * +__find_most_matching_lock (pl_dom_list_t *dom, const char *basename) +{ + pl_entry_lock_t *lock; + pl_entry_lock_t *all = NULL; + pl_entry_lock_t *exact = NULL; + + if (list_empty (&dom->entrylk_list)) + return NULL; + + list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + if (all_names (lock->basename)) + all = lock; + else if (names_equal (lock->basename, basename)) + exact = lock; + } + + return (exact ? exact : all); +} + +/** + * __lock_name - lock a name in a directory + * @inode: inode for the directory in which to lock + * @basename: name of the entry to lock + * if null, lock the entire directory + * + * the entire directory being locked is represented as: a single + * pl_entry_lock_t present in the entrylk_locks list with its + * basename = NULL + */ + +int +__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, + call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *conf = NULL; + transport_t *trans = NULL; + pid_t client_pid = 0; + + int ret = -EINVAL; + + trans = frame->root->trans; + client_pid = frame->root->pid; + + lock = new_entrylk_lock (pinode, basename, type, trans, client_pid, dom->domain); + if (!lock) { + ret = -ENOMEM; + goto out; + } + + conf = __lock_grantable (dom, basename, type); + if (conf) { + ret = -EAGAIN; + if (nonblock) + goto out; + + lock->frame = frame; + lock->this = this; + + list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); + + gf_log (this->name, GF_LOG_TRACE, + "Blocking lock: {pinode=%p, basename=%s}", + pinode, basename); + + goto out; + } + + if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) { + ret = -EAGAIN; + if (nonblock) + goto out; + lock->frame = frame; + lock->this = this; + + list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); + + gf_log (this->name, GF_LOG_TRACE, + "Lock is grantable, but blocking to prevent starvation"); + gf_log (this->name, GF_LOG_TRACE, + "Blocking lock: {pinode=%p, basename=%s}", + pinode, basename); + + goto out; + } + switch (type) { + + case ENTRYLK_WRLCK: + list_add (&lock->domain_list, &dom->entrylk_list); + break; + + default: + + gf_log (this->name, GF_LOG_DEBUG, + "Invalid type for entrylk specified: %d", type); + ret = -EINVAL; + goto out; + } + + ret = 0; +out: + return ret; +} + +/** + * __unlock_name - unlock a name in a directory + * @inode: inode for the directory to unlock in + * @basename: name of the entry to unlock + * if null, unlock the entire directory + */ + +pl_entry_lock_t * +__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *ret_lock = NULL; + + lock = __find_most_matching_lock (dom, basename); + + if (!lock) { + gf_log ("locks", GF_LOG_DEBUG, + "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found", + basename); + goto out; + } + + if (names_equal (lock->basename, basename) + && lock->type == type) { + + if (type == ENTRYLK_WRLCK) { + list_del (&lock->domain_list); + ret_lock = lock; + } + } else { + gf_log ("locks", GF_LOG_DEBUG, + "Unlock for a non-existing lock!"); + goto out; + } + +out: + return ret_lock; +} + + +void +__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct list_head *granted) +{ + int bl_ret = 0; + pl_entry_lock_t *bl = NULL; + pl_entry_lock_t *tmp = NULL; + + list_for_each_entry_safe (bl, tmp, &dom->blocked_entrylks, + blocked_locks) { + + if (__lock_grantable (dom, bl->basename, bl->type)) + continue; + + list_del_init (&bl->blocked_locks); + + /* TODO: error checking */ + + gf_log ("locks", GF_LOG_TRACE, + "Trying to unblock: {pinode=%p, basename=%s}", + pl_inode, bl->basename); + + bl_ret = __lock_name (pl_inode, bl->basename, bl->type, + bl->frame, dom, bl->this, 0); + + if (bl_ret == 0) { + list_add (&bl->blocked_locks, granted); + } else { + if (bl->basename) + FREE (bl->basename); + FREE (bl); + } + } + return; +} + +/* Grants locks if possible which are blocked on a lock */ +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *unlocked, pl_dom_list_t *dom) +{ + struct list_head granted_list; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lock = NULL; + + INIT_LIST_HEAD (&granted_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_entry_locks (this, pl_inode, dom, &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND (lock->frame, 0, 0); + + FREE (lock->basename); + FREE (lock); + } + + FREE (unlocked->basename); + FREE (unlocked); + + return; +} + +/** + * release_entry_locks_for_transport: release all entry locks from this + * transport for this loc_t + */ + +static int +release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode, + pl_dom_list_t *dom, transport_t *trans) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *tmp = NULL; + struct list_head granted; + + INIT_LIST_HEAD (&granted); + + pthread_mutex_lock (&pinode->mutex); + { + if (list_empty (&dom->entrylk_list)) { + goto unlock; + } + + list_for_each_entry_safe (lock, tmp, &dom->entrylk_list, + domain_list) { + if (lock->trans != trans) + continue; + + list_del_init (&lock->domain_list); + + gf_log (this->name, GF_LOG_TRACE, + "releasing lock on held by " + "{transport=%p}",trans);; + + FREE (lock->basename); + FREE (lock); + } + + __grant_blocked_entry_locks (this, pinode, dom, &granted); + + } +unlock: + pthread_mutex_unlock (&pinode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND (lock->frame, 0, 0); + + if (lock->basename) + FREE (lock->basename); + FREE (lock); + } + + return 0; +} + +/* Common entrylk code called by pl_entrylk and pl_fentrylk */ +int +pl_common_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, inode_t *inode, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + + transport_t * transport = NULL; + pid_t pid = -1; + + pl_inode_t * pinode = NULL; + int ret = -1; + pl_entry_lock_t *unlocked = NULL; + char unwind = 1; + + pl_dom_list_t *dom = NULL; + + pinode = pl_inode_get (this, inode); + if (!pinode) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_errno = ENOMEM; + goto out; + } + + dom = get_domain (pinode, volume); + if (!dom){ + gf_log ("posix-locks", GF_LOG_ERROR, + "Out of memory"); + op_errno = ENOMEM; + goto out; + } + + pid = frame->root->pid; + transport = frame->root->trans; + + if (pid == 0) { + /* + this is a special case that means release + all locks from this transport + */ + + gf_log (this->name, GF_LOG_TRACE, + "Releasing locks for transport %p", transport); + + release_entry_locks_for_transport (this, pinode, dom, transport); + op_ret = 0; + + goto out; + } + + switch (cmd) { + case ENTRYLK_LOCK: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, dom, this, 0); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + if (ret == -EAGAIN) + unwind = 0; + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_LOCK_NB: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, dom, this, 1); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_UNLOCK: + pthread_mutex_lock (&pinode->mutex); + { + unlocked = __unlock_name (dom, basename, type); + } + pthread_mutex_unlock (&pinode->mutex); + + if (unlocked) + grant_blocked_entry_locks (this, pinode, unlocked, dom); + + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "Unexpected case in entrylk (cmd=%d). Please file" + "a bug report at http://bugs.gluster.com", cmd); + goto out; + } + + op_ret = 0; +out: + if (unwind) { + STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + +/** + * pl_entrylk: + * + * Locking on names (directory entries) + */ + +int +pl_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + + pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type); + + return 0; +} + + +/** + * pl_fentrylk: + * + * Locking on names (directory entries) + */ + +int +pl_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + + pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type); + + return 0; +} diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 5a834657..e427637b 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -61,30 +61,40 @@ struct __pl_rw_req_t { }; typedef struct __pl_rw_req_t pl_rw_req_t; +struct __pl_dom_list_t { + const char *domain; + struct list_head inode_list; /* list_head back to pl_inode_t */ + struct list_head entrylk_list; /* List of entry locks */ + struct list_head blocked_entrylks; /* List of all blocked entrylks */ + struct list_head inodelk_list; /* List of inode locks */ +}; +typedef struct __pl_dom_list_t pl_dom_list_t; struct __entry_lock { - struct list_head inode_list; /* list_head back to pl_inode_t */ - struct list_head blocked_locks; /* locks blocked due to this lock */ + struct list_head domain_list; /* list_head back to pl_dom_list_t */ + struct list_head blocked_locks; /* list_head back to blocked_entrylks */ call_frame_t *frame; xlator_t *this; - int blocked; - + + const char *volume; + const char *basename; entrylk_type type; - unsigned int read_count; /* number of read locks */ + transport_t *trans; + pid_t client_pid; /* pid of client process */ }; typedef struct __entry_lock pl_entry_lock_t; -/* The "simulated" inode. This contains a list of all the locks associated +/* The "simulated" inode. This contains a list of all the locks associated with this file */ struct __pl_inode { pthread_mutex_t mutex; - struct list_head dir_list; /* list of entry locks */ + struct list_head dom_list; /* list of domains */ struct list_head ext_list; /* list of fcntl locks */ struct list_head int_list; /* list of internal locks */ struct list_head rw_list; /* list of waiting r/w requests */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 5514d441..cbad7844 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -713,15 +713,18 @@ pl_forget (xlator_t *this, pl_entry_lock_t *entry_tmp = NULL; pl_entry_lock_t *entry_l = NULL; + pl_dom_list_t *dom = NULL; + pl_dom_list_t *dom_tmp = NULL; + pl_inode = pl_inode_get (this, inode); if (!list_empty (&pl_inode->rw_list)) { gf_log (this->name, GF_LOG_DEBUG, "Pending R/W requests found, releasing."); - - list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list, + + list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list, list) { - + list_del (&rw_req->list); FREE (rw_req); } @@ -731,9 +734,9 @@ pl_forget (xlator_t *this, gf_log (this->name, GF_LOG_DEBUG, "Pending fcntl locks found, releasing."); - list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list, + list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list, list) { - + __delete_lock (pl_inode, ext_l); __destroy_lock (ext_l); } @@ -743,25 +746,32 @@ pl_forget (xlator_t *this, gf_log (this->name, GF_LOG_DEBUG, "Pending inode locks found, releasing."); - list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list, + list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list, list) { - + __delete_lock (pl_inode, int_l); __destroy_lock (int_l); } } - if (!list_empty (&pl_inode->dir_list)) { - gf_log (this->name, GF_LOG_DEBUG, - "Pending entry locks found, releasing."); - - list_for_each_entry_safe (entry_l, entry_tmp, - &pl_inode->dir_list, inode_list) { - - list_del (&entry_l->inode_list); - FREE (entry_l); + list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { + if (!list_empty (&dom->entrylk_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending entry locks found, releasing."); + + list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) { + list_del_init (&entry_l->domain_list); + grant_blocked_entry_locks (this, pl_inode, entry_l, dom); + if (entry_l->basename) + FREE (entry_l->basename); + FREE (entry_l); + } + } - } + list_del (&dom->inode_list); + FREE (dom->domain); + FREE (dom); + } FREE (pl_inode); -- cgit