From 334981987010f895594031f8363f481eb7ae6416 Mon Sep 17 00:00:00 2001 From: Pavan Vilas Sondur Date: Wed, 23 Sep 2009 06:03:25 +0000 Subject: Implemented inodelks with support for domains. Signed-off-by: Anand V. Avati BUG: 222 (Enhance Internal locks to support multilple domains and rewrite inodelks) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=222 --- xlators/features/locks/src/Makefile.am | 2 +- xlators/features/locks/src/common.c | 73 +++-- xlators/features/locks/src/common.h | 21 +- xlators/features/locks/src/inodelk.c | 548 +++++++++++++++++++++++++++++++++ xlators/features/locks/src/locks.h | 30 +- xlators/features/locks/src/posix.c | 45 ++- 6 files changed, 644 insertions(+), 75 deletions(-) create mode 100644 xlators/features/locks/src/inodelk.c (limited to 'xlators') diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index 451e47edf..ab545cb1c 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -3,7 +3,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features locks_la_LDFLAGS = -module -avoidversion -locks_la_SOURCES = common.c posix.c entrylk.c +locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = locks.h common.h diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index c7137a82c..9b7cdac4d 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -38,11 +38,9 @@ static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom); +__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom); +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock); static pl_dom_list_t * allocate_domain(const char *volume) @@ -68,6 +66,7 @@ allocate_domain(const char *volume) INIT_LIST_HEAD (&dom->entrylk_list); INIT_LIST_HEAD (&dom->blocked_entrylks); INIT_LIST_HEAD (&dom->inodelk_list); + INIT_LIST_HEAD (&dom->blocked_inodelks); return dom; } @@ -116,6 +115,9 @@ pl_inode_get (xlator_t *this, inode_t *inode) goto out; } + gf_log ("posix-locks", GF_LOG_TRACE, + "Allocating new pl inode"); + st_mode = inode->st_mode; if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) pl_inode->mandatory = 1; @@ -125,7 +127,6 @@ pl_inode_get (xlator_t *this, inode_t *inode) INIT_LIST_HEAD (&pl_inode->dom_list); INIT_LIST_HEAD (&pl_inode->ext_list); - INIT_LIST_HEAD (&pl_inode->int_list); INIT_LIST_HEAD (&pl_inode->rw_list); ret = inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); @@ -196,7 +197,7 @@ posix_lock_to_flock (posix_lock_t *lock, struct flock *flock) /* Insert the lock into the inode's lock list */ static void -__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) { list_add_tail (&lock->list, &pl_inode->ext_list); @@ -230,7 +231,7 @@ same_owner (posix_lock_t *l1, posix_lock_t *l2) /* Delete all F_UNLCK locks */ void -__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom) +__delete_unlck_locks (pl_inode_t *pl_inode) { posix_lock_t *l = NULL; posix_lock_t *tmp = NULL; @@ -328,14 +329,13 @@ subtract_locks (posix_lock_t *big, posix_lock_t *small) return v; } -/* +/* Start searching from {begin}, and return the first lock that conflicts, NULL if no conflict If {begin} is NULL, then start from the beginning of the list */ static posix_lock_t * -first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom) +first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) { posix_lock_t *l = NULL; @@ -354,8 +354,7 @@ first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock, /* Return true if lock is grantable */ static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom) +__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) { posix_lock_t *l = NULL; int ret = 1; @@ -379,8 +378,7 @@ extern void do_blocked_rw (pl_inode_t *); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, - gf_lk_domain_t dom) +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) { posix_lock_t *conf = NULL; posix_lock_t *t = NULL; @@ -400,11 +398,11 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, sum->transport = lock->transport; sum->client_pid = lock->client_pid; - __delete_lock (pl_inode, conf); + __delete_lock (pl_inode, conf); __destroy_lock (conf); __destroy_lock (lock); - __insert_and_merge (pl_inode, sum, dom); + __insert_and_merge (pl_inode, sum); return; } else { @@ -415,7 +413,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, sum->client_pid = conf->client_pid; v = subtract_locks (sum, lock); - + __delete_lock (pl_inode, conf); __destroy_lock (conf); @@ -433,11 +431,11 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, continue; } __insert_and_merge (pl_inode, - v.locks[i], dom); + v.locks[i]); } - __delete_unlck_locks (pl_inode, dom); - return; + __delete_unlck_locks (pl_inode); + return; } } @@ -446,14 +444,14 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, } if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { - __insert_lock (pl_inode, lock, dom); + __insert_lock (pl_inode, lock); return; } } /* no conflicts, so just insert */ if (lock->fl_type != F_UNLCK) { - __insert_lock (pl_inode, lock, dom); + __insert_lock (pl_inode, lock); } else { __destroy_lock (lock); } @@ -461,8 +459,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, void -__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, - gf_lk_domain_t dom, struct list_head *granted) +__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted) { struct list_head tmp_list; posix_lock_t *l = NULL; @@ -473,7 +470,7 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { if (l->blocked) { - conf = first_overlap (pl_inode, l, dom); + conf = first_overlap (pl_inode, l); if (conf) continue; @@ -485,12 +482,12 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, list_for_each_entry_safe (l, tmp, &tmp_list, list) { list_del_init (&l->list); - if (__is_lock_grantable (pl_inode, l, dom)) { + if (__is_lock_grantable (pl_inode, l)) { conf = CALLOC (1, sizeof (*conf)); if (!conf) { l->blocked = 1; - __insert_lock (pl_inode, l, dom); + __insert_lock (pl_inode, l); continue; } @@ -506,19 +503,19 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, l->user_flock.l_start, l->user_flock.l_len); - __insert_and_merge (pl_inode, l, dom); + __insert_and_merge (pl_inode, l); list_add (&conf->list, granted); } else { l->blocked = 1; - __insert_lock (pl_inode, l, dom); + __insert_lock (pl_inode, l); } } } void -grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom) +grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode) { struct list_head granted_list; posix_lock_t *tmp = NULL; @@ -528,7 +525,7 @@ grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom) pthread_mutex_lock (&pl_inode->mutex); { - __grant_blocked_locks (this, pl_inode, dom, &granted_list); + __grant_blocked_locks (this, pl_inode, &granted_list); } pthread_mutex_unlock (&pl_inode->mutex); @@ -546,7 +543,7 @@ grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom) int pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block, gf_lk_domain_t dom) + int can_block) { int ret = 0; @@ -554,14 +551,14 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, pthread_mutex_lock (&pl_inode->mutex); { - if (__is_lock_grantable (pl_inode, lock, dom)) { + if (__is_lock_grantable (pl_inode, lock)) { gf_log (this->name, GF_LOG_TRACE, "%s (pid=%d) %"PRId64" - %"PRId64" => OK", lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, lock->user_flock.l_start, lock->user_flock.l_len); - __insert_and_merge (pl_inode, lock, dom); + __insert_and_merge (pl_inode, lock); } else if (can_block) { gf_log (this->name, GF_LOG_TRACE, "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", @@ -570,7 +567,7 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, lock->user_flock.l_start, lock->user_flock.l_len); lock->blocked = 1; - __insert_lock (pl_inode, lock, dom); + __insert_lock (pl_inode, lock); ret = -1; } else { gf_log (this->name, GF_LOG_TRACE, @@ -585,7 +582,7 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, } pthread_mutex_unlock (&pl_inode->mutex); - grant_blocked_locks (this, pl_inode, dom); + grant_blocked_locks (this, pl_inode); do_blocked_rw (pl_inode); @@ -594,11 +591,11 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, posix_lock_t * -pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) { posix_lock_t *conf = NULL; - conf = first_overlap (pl_inode, lock, dom); + conf = first_overlap (pl_inode, lock); if (conf == NULL) { lock->fl_type = F_UNLCK; diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 93da622ca..b082090e1 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -27,14 +27,14 @@ pl_inode_t * pl_inode_get (xlator_t *this, inode_t *inode); posix_lock_t * -pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain); +pl_getlk (pl_inode_t *inode, posix_lock_t *lock); int pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, - int can_block, gf_lk_domain_t domain); + int can_block); void -grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain); +grant_blocked_locks (xlator_t *this, pl_inode_t *inode); void posix_lock_to_flock (posix_lock_t *lock, struct flock *flock); @@ -49,11 +49,20 @@ void __delete_lock (pl_inode_t *, posix_lock_t *); void __destroy_lock (posix_lock_t *); +pl_dom_list_t * +get_domain (pl_inode_t *pl_inode, const char *volume); + +void +grant_blocked_inode_locks (pl_inode_t *pl_inode, pl_inode_lock_t *lock, pl_dom_list_t *dom); + +void +__delete_inode_lock (pl_inode_lock_t *lock); + +void +__destroy_inode_lock (pl_inode_lock_t *lock); + void grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, pl_entry_lock_t *unlocked, pl_dom_list_t *dom); -pl_dom_list_t * -get_domain (pl_inode_t *pl_inode, const char *volume); - #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c new file mode 100644 index 000000000..c802d2d6e --- /dev/null +++ b/xlators/features/locks/src/inodelk.c @@ -0,0 +1,548 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +void + __delete_inode_lock (pl_inode_lock_t *lock) +{ + list_del (&lock->list); +} + +void + __destroy_inode_lock (pl_inode_lock_t *lock) +{ + FREE (lock); +} + +/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */ +static int +inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) + return 1; + + return 0; +} + +/* Determine if the two inodelks overlap reach other's lock regions */ +static int +inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); +} + +/* Returns true if the 2 inodelks have the same owner */ +static int same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + return ((l1->client_pid == l2->client_pid) && + (l1->transport == l2->transport)); +} + +/* Returns true if the 2 inodelks conflict with each other */ +static int +inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + if (same_inodelk_owner (l1, l2)) + return 0; + + if (!inodelk_overlap (l1, l2)) + return 0; + + return (inodelk_type_conflict(l1, l2)); +} + +/* Determine if lock is grantable or not */ +static pl_inode_lock_t * +__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock) +{ + pl_inode_lock_t *l = NULL; + pl_inode_lock_t *ret = NULL; + if (list_empty (&dom->inodelk_list)) + goto out; + list_for_each_entry (l, &dom->inodelk_list, list){ + if (inodelk_conflict (lock, l)) { + ret = l; + goto out; + } + } +out: + return ret; +} + +static pl_inode_lock_t * +__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock) +{ + pl_inode_lock_t *l = NULL; + pl_inode_lock_t *ret = NULL; + + if (list_empty (&dom->blocked_entrylks)) + return NULL; + + list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) { + if (inodelk_conflict (lock, l)) { + ret = l; + goto out; + } + } + + out: + return ret; +} + +static int +__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock) +{ + pl_inode_lock_t *lock = NULL; + + list_for_each_entry (lock, &dom->entrylk_list, list) { + if (same_inodelk_owner (lock, newlock)) + return 1; + } + + list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (same_inodelk_owner (lock, newlock)) + return 1; + } + + return 0; +} + + +/* Determines if lock can be granted and adds the lock. If the lock + * is blocking, adds it to the blocked_inodelks list of the domain. + */ +static int +__lock_inodelk (pl_inode_t *pl_inode, pl_inode_lock_t *lock, + int can_block, pl_dom_list_t *dom) +{ + pl_inode_lock_t *conf = NULL; + int ret = -EINVAL; + + conf = __inodelk_grantable (dom, lock); + if (conf){ + ret = -EAGAIN; + if (can_block == 0) + goto out; + + list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); + + gf_log ("posix-locks", GF_LOG_TRACE, + "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + + + goto out; + } + + if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) { + ret = -EAGAIN; + if (can_block == 0) + goto out; + + list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); + + gf_log ("posix-locks", GF_LOG_TRACE, + "Lock is grantable, but blocking to prevent starvation"); + gf_log ("posix-locks", GF_LOG_TRACE, + "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + + + goto out; + } + list_add (&lock->list, &dom->inodelk_list); + + ret = 0; + +out: + return ret; +} + +/* Return true if the two inodelks have exactly same lock boundaries */ +static int +inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +{ + if ((l1->fl_start == l2->fl_start) && + (l1->fl_end == l2->fl_end)) + return 1; + + return 0; +} + + +static pl_inode_lock_t * +find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom) +{ + pl_inode_lock_t *l = NULL; + list_for_each_entry (l, &dom->inodelk_list, list) { + if (inodelks_equal (l, lock)) + return l; + } + return NULL; +} + +/* Set F_UNLCK removes a lock which has the exact same lock boundaries + * as the UNLCK lock specifies. If such a lock is not found, returns invalid + */ +static pl_inode_lock_t * +__inode_unlock_lock (pl_inode_lock_t *lock, pl_dom_list_t *dom) +{ + + pl_inode_lock_t *conf = NULL; + + conf = find_matching_inodelk (lock, dom); + if (!conf) { + gf_log ("posix-locks", GF_LOG_DEBUG, + " Matching lock not found for unlock"); + goto out; + } + __delete_inode_lock (conf); + gf_log ("posix-locks", GF_LOG_DEBUG, + " Matching lock found for unlock"); + __destroy_inode_lock (lock); + + +out: + return conf; + + +} +static void +__grant_blocked_inode_locks (pl_inode_t *pl_inode, pl_dom_list_t *dom) +{ + int bl_ret = 0; + pl_inode_lock_t *bl = NULL; + pl_inode_lock_t *tmp = NULL; + + list_for_each_entry_safe (bl, tmp, &dom->blocked_inodelks, blocked_locks) { + + if (__inodelk_grantable (dom, bl)) + continue; + + list_del_init (&bl->blocked_locks); + + bl_ret = __lock_inodelk (pl_inode, bl, 1, dom); + + if (bl_ret == 0) { + gf_log ("posix-locks", GF_LOG_TRACE, + "%s (pid=%d) %"PRId64" - %"PRId64" => Granted", + bl->fl_type == F_UNLCK ? "Unlock" : "Lock", + bl->client_pid, + bl->user_flock.l_start, + bl->user_flock.l_len); + + STACK_UNWIND (bl->frame, 0, 0, &bl->user_flock); + } + } + return; +} + +/* Grant all inodelks blocked on a lock */ +void +grant_blocked_inode_locks (pl_inode_t *pl_inode, pl_inode_lock_t *lock, pl_dom_list_t *dom) +{ + + if (list_empty (&dom->blocked_inodelks)) { + return; + } + + + __grant_blocked_inode_locks (pl_inode, dom); + __destroy_inode_lock (lock); + +} + +/* Release all inodelks from this transport */ +static int +release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom, + inode_t *inode, transport_t *trans) +{ + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *l = NULL; + + pl_inode_t * pinode = NULL; + + struct list_head granted; + + char *path = NULL; + + INIT_LIST_HEAD (&granted); + + pinode = pl_inode_get (this, inode); + + pthread_mutex_lock (&pinode->mutex); + { + if (list_empty (&dom->inodelk_list)) { + goto unlock; + } + + list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) { + if (l->transport != trans) + continue; + + list_del_init (&l->list); + + grant_blocked_inode_locks (pinode, l, dom); + + __delete_inode_lock (l); + + inode_path (inode, NULL, &path); + + gf_log (this->name, GF_LOG_TRACE, + "releasing lock on %s held by " + "{transport=%p, pid=%"PRId64"}", + path, trans, + (uint64_t) l->client_pid); + + if (path) + FREE (path); + + } + } +unlock: + pthread_mutex_unlock (&pinode->mutex); + + return 0; +} + + +static int +pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + int can_block, pl_dom_list_t *dom) +{ + int ret = -EINVAL; + pl_inode_lock_t *retlock = NULL; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (lock->fl_type != F_UNLCK) { + ret = __lock_inodelk (pl_inode, lock, can_block, dom); + if (ret == 0) + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->fl_start, + lock->fl_end); + + if (ret == -EAGAIN) + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + + goto out; + } + + + retlock = __inode_unlock_lock (lock, dom); + if (!retlock) { + gf_log (this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + ret = 0; + + grant_blocked_inode_locks (pl_inode, retlock, dom); + } +out: + pthread_mutex_unlock (&pl_inode->mutex); + return ret; +} + +/* Create a new inode_lock_t */ +pl_inode_lock_t * +new_inode_lock (struct flock *flock, transport_t *transport, pid_t client_pid, const char *volume) +{ + pl_inode_lock_t *lock = NULL; + + lock = CALLOC (1, sizeof (*lock)); + if (!lock) { + return NULL; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->transport = transport; + lock->client_pid = client_pid; + lock->volume = volume; + + INIT_LIST_HEAD (&lock->list); + INIT_LIST_HEAD (&lock->blocked_locks); + + return lock; +} + +/* Common inodelk code called form pl_inodelk and pl_finodelk */ +int +pl_common_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, inode_t *inode, int32_t cmd, struct flock *flock) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + int can_block = 0; + transport_t * transport = NULL; + pid_t client_pid = -1; + pl_inode_t * pinode = NULL; + pl_inode_lock_t * reqlock = NULL; + pl_dom_list_t * dom = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (inode, out); + VALIDATE_OR_GOTO (flock, out); + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_errno = EINVAL; + goto unwind; + } + + transport = frame->root->trans; + client_pid = frame->root->pid; + + pinode = pl_inode_get (this, inode); + if (!pinode) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_errno = ENOMEM; + goto unwind; + } + + dom = get_domain (pinode, volume); + + if (client_pid == 0) { + /* + special case: this means release all locks + from this transport + */ + gf_log (this->name, GF_LOG_TRACE, + "Releasing all locks from transport %p", transport); + + release_inode_locks_of_transport (this, dom, inode, transport); + goto unwind; + } + + reqlock = new_inode_lock (flock, transport, client_pid, volume); + if (!reqlock) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + switch (cmd) { + case F_SETLKW: + can_block = 1; + reqlock->frame = frame; + reqlock->this = this; + + /* fall through */ + + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + ret = pl_inode_setlk (this, pinode, reqlock, + can_block, dom); + + if (ret < 0) { + if (can_block) + goto out; + + gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN"); + op_errno = -ret; + __destroy_inode_lock (reqlock); + goto unwind; + } + break; + + default: + op_errno = ENOTSUP; + gf_log (this->name, GF_LOG_DEBUG, + "Lock command F_GETLK not supported for [f]inodelk " + "(cmd=%d)", + cmd); + goto unwind; + } + + op_ret = 0; + +unwind: + STACK_UNWIND (frame, op_ret, op_errno); +out: + return 0; +} + +int +pl_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, struct flock *flock) +{ + + pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock); + + return 0; +} + +int +pl_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, struct flock *flock) +{ + + pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock); + + return 0; + +} diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index e427637bc..76d006cd5 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -54,6 +54,30 @@ struct __posix_lock { }; typedef struct __posix_lock posix_lock_t; +struct __pl_inode_lock { + struct list_head list; + struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */ + + short fl_type; + off_t fl_start; + off_t fl_end; + + const char *volume; + + struct flock user_flock; /* the flock supplied by the user */ + xlator_t *this; /* required for blocked locks */ + fd_t *fd; + + call_frame_t *frame; + + /* These two together serve to uniquely identify each process + across nodes */ + + transport_t *transport; /* to identify client node */ + pid_t client_pid; /* pid of client process */ +}; +typedef struct __pl_inode_lock pl_inode_lock_t; + struct __pl_rw_req_t { struct list_head list; call_stub_t *stub; @@ -67,6 +91,7 @@ struct __pl_dom_list_t { struct list_head entrylk_list; /* List of entry locks */ struct list_head blocked_entrylks; /* List of all blocked entrylks */ struct list_head inodelk_list; /* List of inode locks */ + struct list_head blocked_inodelks; /* List of all blocked inodelks */ }; typedef struct __pl_dom_list_t pl_dom_list_t; @@ -96,17 +121,12 @@ struct __pl_inode { struct list_head dom_list; /* list of domains */ struct list_head ext_list; /* list of fcntl locks */ - struct list_head int_list; /* list of internal locks */ struct list_head rw_list; /* list of waiting r/w requests */ int mandatory; /* if mandatory locking is enabled */ }; typedef struct __pl_inode pl_inode_t; -#define LOCKS_FOR_DOMAIN(inode,domain) (domain == GF_LOCK_POSIX \ - ? inode->fcntl_locks \ - : inode->inodelk_locks) - struct __pl_fd { gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */ }; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index cbad7844e..e7e4ba300 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -248,14 +248,6 @@ __delete_locks_of_owner (pl_inode_t *pl_inode, } } - list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) { - if ((l->transport == transport) - && (l->client_pid == pid)) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } - } - return; } @@ -294,8 +286,7 @@ pl_flush (call_frame_t *frame, xlator_t *this, } pthread_mutex_unlock (&pl_inode->mutex); - grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX); - grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL); + grant_blocked_locks (this, pl_inode); do_blocked_rw (pl_inode); @@ -651,7 +642,7 @@ pl_lk (call_frame_t *frame, xlator_t *this, case F_GETLK64: #endif case F_GETLK: - conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX); + conf = pl_getlk (pl_inode, reqlock); posix_lock_to_flock (conf, flock); __destroy_lock (reqlock); @@ -674,7 +665,7 @@ pl_lk (call_frame_t *frame, xlator_t *this, case F_SETLK: memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); ret = pl_setlk (this, pl_inode, reqlock, - can_block, GF_LOCK_POSIX); + can_block); if (ret == -1) { if (can_block) @@ -700,12 +691,12 @@ pl_forget (xlator_t *this, inode_t *inode) { pl_inode_t *pl_inode = NULL; - + posix_lock_t *ext_tmp = NULL; posix_lock_t *ext_l = NULL; - posix_lock_t *int_tmp = NULL; - posix_lock_t *int_l = NULL; + pl_inode_lock_t *ino_tmp = NULL; + pl_inode_lock_t *ino_l = NULL; pl_rw_req_t *rw_tmp = NULL; pl_rw_req_t *rw_req = NULL; @@ -742,19 +733,19 @@ pl_forget (xlator_t *this, } } - if (!list_empty (&pl_inode->int_list)) { - gf_log (this->name, GF_LOG_DEBUG, - "Pending inode locks found, releasing."); - list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list, - list) { + list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { - __delete_lock (pl_inode, int_l); - __destroy_lock (int_l); - } - } + if (!list_empty (&dom->inodelk_list)) { + gf_log (this->name, GF_LOG_WARNING, + "Pending inode locks found, releasing."); - list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { + list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) { + __delete_inode_lock (ino_l); + grant_blocked_inode_locks (pl_inode, ino_l, dom); + } + + } if (!list_empty (&dom->entrylk_list)) { gf_log (this->name, GF_LOG_WARNING, "Pending entry locks found, releasing."); @@ -762,13 +753,17 @@ pl_forget (xlator_t *this, list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) { list_del_init (&entry_l->domain_list); grant_blocked_entry_locks (this, pl_inode, entry_l, dom); + if (entry_l->basename) FREE (entry_l->basename); FREE (entry_l); } } + list_del (&dom->inode_list); + gf_log ("posix-locks", GF_LOG_TRACE, + " Cleaning up domain: %s", dom->domain); FREE (dom->domain); FREE (dom); } -- cgit