diff options
| -rw-r--r-- | xlators/features/locks/src/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.c | 53 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.h | 7 | ||||
| -rw-r--r-- | xlators/features/locks/src/entrylk.c | 577 | ||||
| -rw-r--r-- | xlators/features/locks/src/locks.h | 24 | ||||
| -rw-r--r-- | xlators/features/locks/src/posix.c | 44 | 
6 files changed, 681 insertions, 26 deletions
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index f59ec6eba..451e47edf 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -3,7 +3,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features  locks_la_LDFLAGS = -module -avoidversion -locks_la_SOURCES = common.c posix.c +locks_la_SOURCES = common.c posix.c entrylk.c  locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la   noinst_HEADERS = locks.h common.h diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 2a393e7d7..c7137a82c 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -44,6 +44,57 @@ static void  __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,  		    gf_lk_domain_t dom); +static pl_dom_list_t * +allocate_domain(const char *volume) +{ +        pl_dom_list_t *dom = NULL; + +        dom = CALLOC (1, sizeof (*dom)); +        if (!dom) +                return NULL; + + +        dom->domain = strdup(volume); +	if (!dom->domain) { +		gf_log ("posix-locks", GF_LOG_TRACE, +			"Out of Memory"); +		return NULL; +	} + +        gf_log ("posix-locks", GF_LOG_TRACE, +                "New domain allocated: %s", dom->domain); + +        INIT_LIST_HEAD (&dom->inode_list); +        INIT_LIST_HEAD (&dom->entrylk_list); +        INIT_LIST_HEAD (&dom->blocked_entrylks); +        INIT_LIST_HEAD (&dom->inodelk_list); + +        return dom; +} + +/* Returns domain for the lock. If domain is not present, + * allocates a domain and returns it + */ +pl_dom_list_t * +get_domain (pl_inode_t *pl_inode, const char *volume) +{ +        pl_dom_list_t *dom = NULL; + +        list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { +                if (strcmp (dom->domain, volume) == 0) +                        goto found; + + +        } + +        dom = allocate_domain(volume); + +        if (dom) +                list_add (&dom->inode_list, &pl_inode->dom_list); +found: + +        return dom; +}  pl_inode_t *  pl_inode_get (xlator_t *this, inode_t *inode) @@ -72,7 +123,7 @@ pl_inode_get (xlator_t *this, inode_t *inode)  	pthread_mutex_init (&pl_inode->mutex, NULL); -	INIT_LIST_HEAD (&pl_inode->dir_list); +	INIT_LIST_HEAD (&pl_inode->dom_list);  	INIT_LIST_HEAD (&pl_inode->ext_list);  	INIT_LIST_HEAD (&pl_inode->int_list);  	INIT_LIST_HEAD (&pl_inode->rw_list); diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index ee17b0087..93da622ca 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -49,4 +49,11 @@ void __delete_lock (pl_inode_t *, posix_lock_t *);  void __destroy_lock (posix_lock_t *); +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, +			   pl_entry_lock_t *unlocked, pl_dom_list_t *dom); + +pl_dom_list_t * +get_domain (pl_inode_t *pl_inode, const char *volume); +  #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c new file mode 100644 index 000000000..0ddc9532f --- /dev/null +++ b/xlators/features/locks/src/entrylk.c @@ -0,0 +1,577 @@ +/* +  Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +static pl_entry_lock_t * +new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, +		  transport_t *trans, pid_t client_pid, const char *volume) +{ +	pl_entry_lock_t *newlock = NULL; + +	newlock = CALLOC (1, sizeof (pl_entry_lock_t)); +	if (!newlock) { +		goto out; +	} + +	newlock->basename       = basename ? strdup (basename) : NULL; +	newlock->type           = type; +	newlock->trans          = trans; +	newlock->volume         = volume; +        newlock->client_pid     = client_pid; + + +	INIT_LIST_HEAD (&newlock->domain_list); +	INIT_LIST_HEAD (&newlock->blocked_locks); + +out: +	return newlock; +} + + +/** + * all_names - does a basename represent all names? + * @basename: name to check + */ + +#define all_names(basename) ((basename == NULL) ? 1 : 0) + +/** + * names_conflict - do two names conflict? + * @n1: name + * @n2: name + */ + +static int +names_conflict (const char *n1, const char *n2) +{ +	return all_names (n1) || all_names (n2) || !strcmp (n1, n2); +} + + +static int +__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) +{ +        return ((l1->client_pid == l2->client_pid) && +		(l1->trans  == l2->trans)); +} + + +/** + * lock_grantable - is this lock grantable? + * @inode: inode in which to look + * @basename: name we're trying to lock + * @type: type of lock + */ +static pl_entry_lock_t * +__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ +	pl_entry_lock_t *lock = NULL; + +	if (list_empty (&dom->entrylk_list)) +		return NULL; + +	list_for_each_entry (lock, &dom->entrylk_list, domain_list) { +		if (names_conflict (lock->basename, basename)) +			return lock; +	} + +	return NULL; +} + +static pl_entry_lock_t * +__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ +	pl_entry_lock_t *lock = NULL; + +	if (list_empty (&dom->blocked_entrylks)) +		return NULL; + +	list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { +		if (names_conflict (lock->basename, basename)) +			return lock; +	} + +	return NULL; +} + +static int +__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock) +{ +        pl_entry_lock_t *lock = NULL; + +	list_for_each_entry (lock, &dom->entrylk_list, domain_list) { +		if (__same_entrylk_owner (lock, newlock)) +			return 1; +	} + +	list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { +		if (__same_entrylk_owner (lock, newlock)) +			return 1; +	} + +	return 0; +} + +static int +names_equal (const char *n1, const char *n2) +{ +	return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); +} + +/** + * __find_most_matching_lock - find the lock struct which most matches in order of: + *                           lock on the exact basename || + *                           an all_names lock + * + * + * @inode: inode in which to look + * @basename: name to search for + */ + +static pl_entry_lock_t * +__find_most_matching_lock (pl_dom_list_t *dom, const char *basename) +{ +	pl_entry_lock_t *lock; +	pl_entry_lock_t *all = NULL; +	pl_entry_lock_t *exact = NULL; + +	if (list_empty (&dom->entrylk_list)) +		return NULL; + +	list_for_each_entry (lock, &dom->entrylk_list, domain_list) { +		if (all_names (lock->basename)) +			all = lock; +		else if (names_equal (lock->basename, basename)) +			exact = lock; +	} + +	return (exact ? exact : all); +} + +/** + * __lock_name - lock a name in a directory + * @inode: inode for the directory in which to lock + * @basename: name of the entry to lock + *            if null, lock the entire directory + * + * the entire directory being locked is represented as: a single + * pl_entry_lock_t present in the entrylk_locks list with its + * basename = NULL + */ + +int +__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, +	     call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock) +{ +	pl_entry_lock_t *lock    = NULL; +	pl_entry_lock_t *conf    = NULL; +	transport_t     *trans   = NULL; +        pid_t        client_pid = 0; + +	int ret = -EINVAL; + +	trans = frame->root->trans; +        client_pid = frame->root->pid; + +	lock = new_entrylk_lock (pinode, basename, type, trans, client_pid, dom->domain); +	if (!lock) { +		ret = -ENOMEM; +		goto out; +	} + +	conf = __lock_grantable (dom, basename, type); +	if (conf) { +		ret = -EAGAIN; +		if (nonblock) +			goto out; + +		lock->frame   = frame; +		lock->this    = this; + +		list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); + +		gf_log (this->name, GF_LOG_TRACE, +			"Blocking lock: {pinode=%p, basename=%s}", +			pinode, basename); + +		goto out; +	} + +        if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) { +                ret = -EAGAIN; +                if (nonblock) +                        goto out; +                lock->frame     = frame; +                lock->this      = this; + +                list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); + +                gf_log (this->name, GF_LOG_TRACE, +                        "Lock is grantable, but blocking to prevent starvation"); +		gf_log (this->name, GF_LOG_TRACE, +			"Blocking lock: {pinode=%p, basename=%s}", +			pinode, basename); + +		goto out; +        } +        switch (type) { + +	case ENTRYLK_WRLCK: +		list_add (&lock->domain_list, &dom->entrylk_list); +		break; + +        default: + +          gf_log (this->name, GF_LOG_DEBUG, +                  "Invalid type for entrylk specified: %d", type); +          ret = -EINVAL; +          goto out; +	} + +	ret = 0; +out: +	return ret; +} + +/** + * __unlock_name - unlock a name in a directory + * @inode: inode for the directory to unlock in + * @basename: name of the entry to unlock + *            if null, unlock the entire directory + */ + +pl_entry_lock_t * +__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type) +{ +	pl_entry_lock_t *lock = NULL; +	pl_entry_lock_t *ret_lock = NULL; + +	lock = __find_most_matching_lock (dom, basename); + +	if (!lock) { +		gf_log ("locks", GF_LOG_DEBUG, +			"unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found", +			basename); +		goto out; +	} + +	if (names_equal (lock->basename, basename) +	    && lock->type == type) { + +		if (type == ENTRYLK_WRLCK) { +			list_del (&lock->domain_list); +			ret_lock = lock; +		} +	} else { +		gf_log ("locks", GF_LOG_DEBUG, +			"Unlock for a non-existing lock!"); +		goto out; +	} + +out: +	return ret_lock; +} + + +void +__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, +			     pl_dom_list_t *dom, struct list_head *granted) +{ +	int              bl_ret = 0; +	pl_entry_lock_t *bl   = NULL; +	pl_entry_lock_t *tmp  = NULL; + +	list_for_each_entry_safe (bl, tmp, &dom->blocked_entrylks, +				  blocked_locks) { + +                if (__lock_grantable (dom, bl->basename, bl->type)) +                        continue; + +		list_del_init (&bl->blocked_locks); + +		/* TODO: error checking */ + +		gf_log ("locks", GF_LOG_TRACE, +			"Trying to unblock: {pinode=%p, basename=%s}", +			pl_inode, bl->basename); + +		bl_ret = __lock_name (pl_inode, bl->basename, bl->type, +				      bl->frame, dom, bl->this, 0); + +		if (bl_ret == 0) { +			list_add (&bl->blocked_locks, granted); +		} else { +			if (bl->basename) +				FREE (bl->basename); +			FREE (bl); +		} +	} +	return; +} + +/* Grants locks if possible which are blocked on a lock */ +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, +			   pl_entry_lock_t *unlocked, pl_dom_list_t *dom) +{ +	struct list_head  granted_list; +	pl_entry_lock_t  *tmp = NULL; +	pl_entry_lock_t  *lock = NULL; + +	INIT_LIST_HEAD (&granted_list); + +	pthread_mutex_lock (&pl_inode->mutex); +	{ +		__grant_blocked_entry_locks (this, pl_inode, dom, &granted_list); +	} +	pthread_mutex_unlock (&pl_inode->mutex); + +	list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { +		list_del_init (&lock->blocked_locks); + +		STACK_UNWIND (lock->frame, 0, 0); + +		FREE (lock->basename); +		FREE (lock); +	} + +	FREE (unlocked->basename); +	FREE (unlocked); + +	return; +} + +/** + * release_entry_locks_for_transport: release all entry locks from this + * transport for this loc_t + */ + +static int +release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode, +				   pl_dom_list_t *dom, transport_t *trans) +{ +	pl_entry_lock_t  *lock = NULL; +	pl_entry_lock_t  *tmp = NULL; +	struct list_head  granted; + +	INIT_LIST_HEAD (&granted); + +	pthread_mutex_lock (&pinode->mutex); +	{ +		if (list_empty (&dom->entrylk_list)) { +			goto unlock; +		} + +		list_for_each_entry_safe (lock, tmp, &dom->entrylk_list, +					  domain_list) { +			if (lock->trans != trans) +				continue; + +			list_del_init (&lock->domain_list); + +			gf_log (this->name, GF_LOG_TRACE, +                                "releasing lock on  held by " +                                "{transport=%p}",trans);; + +			FREE (lock->basename); +			FREE (lock); +		} + +		__grant_blocked_entry_locks (this, pinode, dom, &granted); + +	} +unlock: +	pthread_mutex_unlock (&pinode->mutex); + +	list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { +		list_del_init (&lock->blocked_locks); + +		STACK_UNWIND (lock->frame, 0, 0); + +		if (lock->basename) +			FREE (lock->basename); +		FREE (lock); +	} + +	return 0; +} + +/* Common entrylk code called by pl_entrylk and pl_fentrylk */ +int +pl_common_entrylk (call_frame_t *frame, xlator_t *this, +	    const char *volume, inode_t *inode, const char *basename, +	    entrylk_cmd cmd, entrylk_type type) +{ +	int32_t op_ret   = -1; +	int32_t op_errno = 0; + +	transport_t * transport = NULL; +	pid_t pid = -1; + +	pl_inode_t *       pinode = NULL; +	int                ret    = -1; +	pl_entry_lock_t   *unlocked = NULL; +	char               unwind = 1; + +	pl_dom_list_t	  *dom = NULL; + +	pinode = pl_inode_get (this, inode); +	if (!pinode) { +		gf_log (this->name, GF_LOG_ERROR, +			"Out of memory."); +		op_errno = ENOMEM; +		goto out; +	} + +	dom = get_domain (pinode, volume); +	if (!dom){ +		gf_log ("posix-locks", GF_LOG_ERROR, +			"Out of memory"); +		op_errno = ENOMEM; +		goto out; +	} + +	pid       = frame->root->pid; +	transport = frame->root->trans; + +	if (pid == 0) { +		/* +		   this is a special case that means release +		   all locks from this transport +		*/ + +		gf_log (this->name, GF_LOG_TRACE, +			"Releasing locks for transport %p", transport); + +		release_entry_locks_for_transport (this, pinode, dom, transport); +		op_ret = 0; + +		goto out; +	} + +	switch (cmd) { +	case ENTRYLK_LOCK: +		pthread_mutex_lock (&pinode->mutex); +		{ +			ret = __lock_name (pinode, basename, type, +					   frame, dom, this, 0); +		} +		pthread_mutex_unlock (&pinode->mutex); + +		if (ret < 0) { +			if (ret == -EAGAIN) +				unwind = 0; +			op_errno = -ret; +			goto out; +		} + +		break; + +	case ENTRYLK_LOCK_NB: +		pthread_mutex_lock (&pinode->mutex); +		{ +			ret = __lock_name (pinode, basename, type, +					   frame, dom, this, 1); +		} +		pthread_mutex_unlock (&pinode->mutex); + +		if (ret < 0) { +			op_errno = -ret; +			goto out; +		} + +		break; + +	case ENTRYLK_UNLOCK: +		pthread_mutex_lock (&pinode->mutex); +		{ +			 unlocked = __unlock_name (dom, basename, type); +		} +		pthread_mutex_unlock (&pinode->mutex); + +		if (unlocked) +			grant_blocked_entry_locks (this, pinode, unlocked, dom); + +		break; + +	default: +		gf_log (this->name, GF_LOG_ERROR, +			"Unexpected case in entrylk (cmd=%d). Please file" +                        "a bug report at http://bugs.gluster.com", cmd); +		goto out; +	} + +	op_ret = 0; +out: +	if (unwind) { +		STACK_UNWIND (frame, op_ret, op_errno); +	} + +	return 0; +} + +/** + * pl_entrylk: + * + * Locking on names (directory entries) + */ + +int +pl_entrylk (call_frame_t *frame, xlator_t *this, +	    const char *volume, loc_t *loc, const char *basename, +	    entrylk_cmd cmd, entrylk_type type) +{ + +  pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type); + +  return 0; +} + + +/** + * pl_fentrylk: + * + * Locking on names (directory entries) + */ + +int +pl_fentrylk (call_frame_t *frame, xlator_t *this, +	     const char *volume, fd_t *fd, const char *basename, +	     entrylk_cmd cmd, entrylk_type type) +{ + +  pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type); + +	return 0; +} diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 5a834657d..e427637bc 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -61,30 +61,40 @@ struct __pl_rw_req_t {  };  typedef struct __pl_rw_req_t pl_rw_req_t; +struct __pl_dom_list_t { +        const char *domain; +        struct list_head inode_list;       /* list_head back to pl_inode_t */ +        struct list_head entrylk_list;     /* List of entry locks */ +        struct list_head blocked_entrylks; /* List of all blocked entrylks */ +        struct list_head inodelk_list;     /* List of inode locks */ +}; +typedef struct __pl_dom_list_t pl_dom_list_t;  struct __entry_lock { -	struct list_head  inode_list;    /* list_head back to pl_inode_t */ -	struct list_head  blocked_locks; /* locks blocked due to this lock */ +	struct list_head  domain_list;    /* list_head back to pl_dom_list_t */ +	struct list_head  blocked_locks; /* list_head back to blocked_entrylks */  	call_frame_t     *frame;  	xlator_t         *this; -	int               blocked; -	 + +        const char       *volume; +  	const char       *basename;  	entrylk_type      type; -	unsigned int      read_count;    /* number of read locks */ +  	transport_t      *trans; +	pid_t             client_pid;    /* pid of client process */  };  typedef struct __entry_lock pl_entry_lock_t; -/* The "simulated" inode. This contains a list of all the locks associated  +/* The "simulated" inode. This contains a list of all the locks associated     with this file */  struct __pl_inode {  	pthread_mutex_t  mutex; -	struct list_head dir_list;       /* list of entry locks */ +	struct list_head dom_list;       /* list of domains */  	struct list_head ext_list;       /* list of fcntl locks */  	struct list_head int_list;       /* list of internal locks */  	struct list_head rw_list;        /* list of waiting r/w requests */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 5514d4414..cbad7844e 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -713,15 +713,18 @@ pl_forget (xlator_t *this,          pl_entry_lock_t *entry_tmp = NULL;          pl_entry_lock_t *entry_l   = NULL; +        pl_dom_list_t *dom = NULL; +        pl_dom_list_t *dom_tmp = NULL; +  	pl_inode = pl_inode_get (this, inode);  	if (!list_empty (&pl_inode->rw_list)) {  		gf_log (this->name, GF_LOG_DEBUG,  			"Pending R/W requests found, releasing."); -                 -                list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,  + +                list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,                                            list) { -                         +                          list_del (&rw_req->list);                          FREE (rw_req);                  } @@ -731,9 +734,9 @@ pl_forget (xlator_t *this,  		gf_log (this->name, GF_LOG_DEBUG,  			"Pending fcntl locks found, releasing."); -                list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,  +                list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,                                            list) { -                         +                          __delete_lock (pl_inode, ext_l);                          __destroy_lock (ext_l);                  } @@ -743,25 +746,32 @@ pl_forget (xlator_t *this,  		gf_log (this->name, GF_LOG_DEBUG,  			"Pending inode locks found, releasing."); -                list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list,  +                list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list,                                            list) { -                         +                          __delete_lock (pl_inode, int_l);                          __destroy_lock (int_l);                  }  	} -	if (!list_empty (&pl_inode->dir_list)) { -		gf_log (this->name, GF_LOG_DEBUG, -			"Pending entry locks found, releasing."); -                 -                list_for_each_entry_safe (entry_l, entry_tmp,  -                                          &pl_inode->dir_list, inode_list) { -                         -                        list_del (&entry_l->inode_list); -                        FREE (entry_l); +        list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { +                if (!list_empty (&dom->entrylk_list)) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "Pending entry locks found, releasing."); + +                        list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) { +                                list_del_init (&entry_l->domain_list); +                                grant_blocked_entry_locks (this, pl_inode, entry_l, dom); +                                if (entry_l->basename) +                                        FREE (entry_l->basename); +                                FREE (entry_l); +                        } +                  } -	} +                list_del (&dom->inode_list); +                FREE (dom->domain); +                FREE (dom); +        }          FREE (pl_inode);  | 
