diff options
Diffstat (limited to 'xlators/features/locks/src/entrylk.c')
| -rw-r--r-- | xlators/features/locks/src/entrylk.c | 1499 |
1 files changed, 925 insertions, 574 deletions
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index b83044b7f28..fd772c850dd 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -1,65 +1,83 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/list.h> +#include <glusterfs/upcall-utils.h> #include "locks.h" +#include "clear.h" #include "common.h" +#include "pl-messages.h" -static pl_entry_lock_t * -new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, - transport_t *trans, pid_t client_pid, uint64_t owner, const char *volume) - +void +__pl_entrylk_unref(pl_entry_lock_t *lock) { - pl_entry_lock_t *newlock = NULL; - - newlock = CALLOC (1, sizeof (pl_entry_lock_t)); - if (!newlock) { - goto out; - } - - newlock->basename = basename ? strdup (basename) : NULL; - newlock->type = type; - newlock->trans = trans; - newlock->volume = volume; - newlock->client_pid = client_pid; - newlock->owner = owner; + lock->ref--; + if (!lock->ref) { + GF_FREE((char *)lock->basename); + GF_FREE(lock->connection_id); + GF_FREE(lock); + } +} - INIT_LIST_HEAD (&newlock->domain_list); - INIT_LIST_HEAD (&newlock->blocked_locks); +static void +__pl_entrylk_ref(pl_entry_lock_t *lock) +{ + lock->ref++; +} +static pl_entry_lock_t * +new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type, + const char *domain, call_frame_t *frame, char *conn_id, + int32_t *op_errno) +{ + pl_entry_lock_t *newlock = NULL; + + if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) { + *op_errno = EINVAL; + goto out; + } + + newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t), + gf_locks_mt_pl_entry_lock_t); + if (!newlock) { + *op_errno = ENOMEM; + goto out; + } + + newlock->basename = basename ? gf_strdup(basename) : NULL; + newlock->type = type; + newlock->client = frame->root->client; + newlock->client_pid = frame->root->pid; + newlock->volume = domain; + newlock->owner = frame->root->lk_owner; + newlock->frame = frame; + newlock->this = frame->this; + + if (conn_id) { + newlock->connection_id = gf_strdup(conn_id); + } + + INIT_LIST_HEAD(&newlock->domain_list); + INIT_LIST_HEAD(&newlock->blocked_locks); + INIT_LIST_HEAD(&newlock->client_list); + + __pl_entrylk_ref(newlock); out: - return newlock; + return newlock; } - /** * all_names - does a basename represent all names? * @basename: name to check @@ -74,206 +92,411 @@ out: */ static int -names_conflict (const char *n1, const char *n2) +names_conflict(const char *n1, const char *n2) { - return all_names (n1) || all_names (n2) || !strcmp (n1, n2); + return all_names(n1) || all_names(n2) || !strcmp(n1, n2); } +static int +__same_entrylk_owner(pl_entry_lock_t *l1, pl_entry_lock_t *l2) +{ + return (is_same_lkowner(&l1->owner, &l2->owner) && + (l1->client == l2->client)); +} +/* Just as in inodelk, allow conflicting name locks from same (lk_owner, conn)*/ static int -__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) +__conflicting_entrylks(pl_entry_lock_t *l1, pl_entry_lock_t *l2) +{ + if (names_conflict(l1->basename, l2->basename) && + !__same_entrylk_owner(l1, l2)) + return 1; + + return 0; +} + +/* See comments in inodelk.c for details */ +static inline gf_boolean_t +__stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, + pl_entry_lock_t *requested_lock, time_t *lock_age_sec) { + posix_locks_private_t *priv = NULL; + + priv = this->private; + + /* Question: Should we just prune them all given the + * chance? Or just the locks we are attempting to acquire? + */ + if (names_conflict(candidate_lock->basename, requested_lock->basename)) { + *lock_age_sec = gf_time() - candidate_lock->granted_time; + if (*lock_age_sec > priv->revocation_secs) + return _gf_true; + } + return _gf_false; +} - return ((l1->owner == l2->owner) && - (l1->trans == l2->trans)); +/* See comments in inodelk.c for details */ +static gf_boolean_t +__entrylk_prune_stale(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_entry_lock_t *lock) +{ + posix_locks_private_t *priv = NULL; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lk = NULL; + gf_boolean_t revoke_lock = _gf_false; + int bcount = 0; + int gcount = 0; + int op_errno = 0; + clrlk_args args; + args.opts = NULL; + time_t lk_age_sec = 0; + uint32_t max_blocked = 0; + char *reason_str = NULL; + + priv = this->private; + args.type = CLRLK_ENTRY; + if (priv->revocation_clear_all == _gf_true) + args.kind = CLRLK_ALL; + else + args.kind = CLRLK_GRANTED; + + if (list_empty(&dom->entrylk_list)) + goto out; + + pthread_mutex_lock(&pinode->mutex); + lock->pinode = pinode; + list_for_each_entry_safe(lk, tmp, &dom->entrylk_list, domain_list) + { + if (__stale_entrylk(this, lk, lock, &lk_age_sec) == _gf_true) { + revoke_lock = _gf_true; + reason_str = "age"; + break; + } + } + max_blocked = priv->revocation_max_blocked; + if (max_blocked != 0 && revoke_lock == _gf_false) { + list_for_each_entry_safe(lk, tmp, &dom->blocked_entrylks, blocked_locks) + { + max_blocked--; + if (max_blocked == 0) { + revoke_lock = _gf_true; + reason_str = "max blocked"; + break; + } + } + } + pthread_mutex_unlock(&pinode->mutex); + +out: + if (revoke_lock == _gf_true) { + clrlk_clear_entrylk(this, pinode, dom, &args, &bcount, &gcount, + &op_errno); + gf_log(this->name, GF_LOG_WARNING, + "Lock revocation [reason: %s; gfid: %s; domain: %s; " + "age: %ld sec] - Entry lock revoked: %d granted & %d " + "blocked locks cleared", + reason_str, uuid_utoa(pinode->gfid), dom->domain, lk_age_sec, + gcount, bcount); + } + + return revoke_lock; } +void +entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) +{ + posix_locks_private_t *priv; + int64_t elapsed; + + priv = this->private; + + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { + return; + } + + elapsed = now->tv_sec; + elapsed -= lock->contention_time.tv_sec; + if (now->tv_nsec < lock->contention_time.tv_nsec) { + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { + return; + } + + /* All contention notifications will be sent outside of the locked + * region. This means that currently granted locks might have already + * been unlocked by that time. To avoid the lock or the inode to be + * destroyed before we process them, we take an additional reference + * on both. */ + inode_ref(lock->pinode->inode); + __pl_entrylk_ref(lock); + + lock->contention_time = *now; + + list_add_tail(&lock->contend, contend); +} + +void +entrylk_contention_notify(xlator_t *this, struct list_head *contend) +{ + struct gf_upcall up; + struct gf_upcall_entrylk_contention lc; + pl_entry_lock_t *lock; + pl_inode_t *pl_inode; + client_t *client; + gf_boolean_t notify; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_entry_lock_t, contend); + + pl_inode = lock->pinode; + + pthread_mutex_lock(&pl_inode->mutex); + + /* If the lock has already been released, no notification is + * sent. We clear the notification time in this case. */ + notify = !list_empty(&lock->domain_list); + if (!notify) { + lock->contention_time.tv_sec = 0; + lock->contention_time.tv_nsec = 0; + } else { + lc.type = lock->type; + lc.name = lock->basename; + lc.pid = lock->client_pid; + lc.domain = lock->volume; + lc.xdata = NULL; + + gf_uuid_copy(up.gfid, lock->pinode->gfid); + client = (client_t *)lock->client; + if (client == NULL) { + /* A NULL client can be found if the entrylk + * was issued by a server side xlator. */ + up.client_uid = NULL; + } else { + up.client_uid = client->client_uid; + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + if (notify) { + up.event_type = GF_UPCALL_ENTRYLK_CONTENTION; + up.data = &lc; + + if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) { + gf_msg_debug(this->name, 0, + "Entrylk contention notification " + "failed"); + } else { + gf_msg_debug(this->name, 0, + "Entrylk contention notification " + "sent"); + } + } + + pthread_mutex_lock(&pl_inode->mutex); + + list_del_init(&lock->contend); + __pl_entrylk_unref(lock); + + pthread_mutex_unlock(&pl_inode->mutex); + + inode_unref(pl_inode->inode); + } +} /** - * lock_grantable - is this lock grantable? + * entrylk_grantable - is this lock grantable? * @inode: inode in which to look * @basename: name we're trying to lock * @type: type of lock */ static pl_entry_lock_t * -__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type) +__entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) { - pl_entry_lock_t *lock = NULL; - - if (list_empty (&dom->entrylk_list)) - return NULL; - - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (names_conflict (lock->basename, basename)) - return lock; - } + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *ret = NULL; + + list_for_each_entry(tmp, &dom->entrylk_list, domain_list) + { + if (__conflicting_entrylks(tmp, lock)) { + if (ret == NULL) { + ret = tmp; + if (contend == NULL) { + break; + } + } + entrylk_contention_notify_check(this, tmp, now, contend); + } + } - return NULL; + return ret; } static pl_entry_lock_t * -__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type) +__blocked_entrylk_conflict(pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *lock = NULL; - - if (list_empty (&dom->blocked_entrylks)) - return NULL; + pl_entry_lock_t *tmp = NULL; - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - if (names_conflict (lock->basename, basename)) - return lock; - } + list_for_each_entry(tmp, &dom->blocked_entrylks, blocked_locks) + { + if (names_conflict(tmp->basename, lock->basename)) + return lock; + } - return NULL; + return NULL; } static int -__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock) +__owner_has_lock(pl_dom_list_t *dom, pl_entry_lock_t *newlock) { - pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *lock = NULL; - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (__same_entrylk_owner (lock, newlock)) - return 1; - } + list_for_each_entry(lock, &dom->entrylk_list, domain_list) + { + if (__same_entrylk_owner(lock, newlock)) + return 1; + } - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - if (__same_entrylk_owner (lock, newlock)) - return 1; - } + list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) + { + if (__same_entrylk_owner(lock, newlock)) + return 1; + } - return 0; + return 0; } static int -names_equal (const char *n1, const char *n2) +names_equal(const char *n1, const char *n2) { - return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); + return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp(n1, n2)); } void -pl_print_entrylk (char *str, int size, entrylk_cmd cmd, entrylk_type type, - const char *basename, const char *domain) +pl_print_entrylk(char *str, int size, entrylk_cmd cmd, entrylk_type type, + const char *basename, const char *domain) { - char *cmd_str = NULL; - char *type_str = NULL; + char *cmd_str = NULL; + char *type_str = NULL; - switch (cmd) { + switch (cmd) { case ENTRYLK_LOCK: - cmd_str = "LOCK"; - break; + cmd_str = "LOCK"; + break; case ENTRYLK_LOCK_NB: - cmd_str = "LOCK_NB"; - break; + cmd_str = "LOCK_NB"; + break; case ENTRYLK_UNLOCK: - cmd_str = "UNLOCK"; - break; + cmd_str = "UNLOCK"; + break; default: - cmd_str = "UNKNOWN"; - break; - } + cmd_str = "UNKNOWN"; + break; + } - switch (type) { + switch (type) { case ENTRYLK_RDLCK: - type_str = "READ"; - break; + type_str = "READ"; + break; case ENTRYLK_WRLCK: - type_str = "WRITE"; - break; + type_str = "WRITE"; + break; default: - type_str = "UNKNOWN"; - break; - } + type_str = "UNKNOWN"; + break; + } - snprintf (str, size, "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s", - cmd_str, type_str, basename, domain); + snprintf(str, size, + "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s", cmd_str, + type_str, basename, domain); } - void -entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) +entrylk_trace_in(xlator_t *this, call_frame_t *frame, const char *domain, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_entrylk[256]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, domain); - gf_log (this->name, GF_LOG_NORMAL, - "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_entrylk); + gf_log(this->name, GF_LOG_INFO, + "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_entrylk); } - void -entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, int op_ret, int op_errno) +entrylk_trace_out(xlator_t *this, call_frame_t *frame, const char *domain, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, int op_ret, int op_errno) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_entrylk[256]; - char verdict[32]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; + char verdict[32]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); - pl_print_verdict (verdict, 32, op_ret, op_errno); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, domain); + pl_print_verdict(verdict, 32, op_ret, op_errno); - gf_log (this->name, GF_LOG_NORMAL, - "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", - verdict, pl_locker, pl_lockee, pl_entrylk); + gf_log(this->name, GF_LOG_INFO, + "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker, + pl_lockee, pl_entrylk); } - void -entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) +entrylk_trace_block(xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_entrylk[256]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, volume); - gf_log (this->name, GF_LOG_NORMAL, - "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_entrylk); + gf_log(this->name, GF_LOG_INFO, + "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_entrylk); } /** - * __find_most_matching_lock - find the lock struct which most matches in order of: - * lock on the exact basename || - * an all_names lock + * __find_most_matching_lock - find the lock struct which most matches in order + * of: lock on the exact basename || an all_names lock * * * @inode: inode in which to look @@ -281,27 +504,61 @@ entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, */ static pl_entry_lock_t * -__find_most_matching_lock (pl_dom_list_t *dom, const char *basename) +__find_most_matching_lock(pl_dom_list_t *dom, const char *basename) +{ + pl_entry_lock_t *lock; + pl_entry_lock_t *all = NULL; + pl_entry_lock_t *exact = NULL; + + if (list_empty(&dom->entrylk_list)) + return NULL; + + list_for_each_entry(lock, &dom->entrylk_list, domain_list) + { + if (all_names(lock->basename)) + all = lock; + else if (names_equal(lock->basename, basename)) + exact = lock; + } + + return (exact ? exact : all); +} + +static pl_entry_lock_t * +__find_matching_lock(pl_dom_list_t *dom, pl_entry_lock_t *lock) +{ + pl_entry_lock_t *tmp = NULL; + + list_for_each_entry(tmp, &dom->entrylk_list, domain_list) + { + if (names_equal(lock->basename, tmp->basename) && + __same_entrylk_owner(lock, tmp) && (lock->type == tmp->type)) + return tmp; + } + return NULL; +} + +static int +__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_entry_lock_t *lock, int nonblock) { - pl_entry_lock_t *lock; - pl_entry_lock_t *all = NULL; - pl_entry_lock_t *exact = NULL; + if (nonblock) + goto out; - if (list_empty (&dom->entrylk_list)) - return NULL; + lock->blkd_time = gf_time(); + list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks); - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (all_names (lock->basename)) - all = lock; - else if (names_equal (lock->basename, basename)) - exact = lock; - } + gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", + pinode, lock->basename); - return (exact ? exact : all); + entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename, + ENTRYLK_LOCK, lock->type); +out: + return -EAGAIN; } /** - * __lock_name - lock a name in a directory + * __lock_entrylk - lock a name in a directory * @inode: inode for the directory in which to lock * @basename: name of the entry to lock * if null, lock the entire directory @@ -312,397 +569,375 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename) */ int -__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, - call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock) +__lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, + int nonblock, pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) { - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *conf = NULL; - transport_t *trans = NULL; - pid_t client_pid = 0; - uint64_t owner = 0; - - int ret = -EINVAL; - - trans = frame->root->trans; - client_pid = frame->root->pid; - owner = (uint64_t)(long)frame->root; - - lock = new_entrylk_lock (pinode, basename, type, trans, client_pid, owner, dom->domain); - if (!lock) { - ret = -ENOMEM; - goto out; - } - - conf = __lock_grantable (dom, basename, type); - if (conf) { - ret = -EAGAIN; - if (nonblock) - goto out; - - lock->frame = frame; - lock->this = this; - - list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); - - gf_log (this->name, GF_LOG_TRACE, - "Blocking lock: {pinode=%p, basename=%s}", - pinode, basename); - - goto out; - } - - if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) { - ret = -EAGAIN; - if (nonblock) - goto out; - lock->frame = frame; - lock->this = this; - - list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); - - gf_log (this->name, GF_LOG_TRACE, - "Lock is grantable, but blocking to prevent starvation"); - gf_log (this->name, GF_LOG_TRACE, - "Blocking lock: {pinode=%p, basename=%s}", - pinode, basename); - - goto out; + pl_entry_lock_t *conf = NULL; + int ret = -EAGAIN; + + conf = __entrylk_grantable(this, dom, lock, now, contend); + if (conf) { + ret = __lock_blocked_add(this, pinode, dom, lock, nonblock); + goto out; + } + + /* To prevent blocked locks starvation, check if there are any blocked + * locks thay may conflict with this lock. If there is then don't grant + * the lock. BUT grant the lock if the owner already has lock to allow + * nested locks. + * Example: SHD from Machine1 takes (gfid, basename=257-length-name) + * and is granted. + * SHD from machine2 takes (gfid, basename=NULL) and is blocked. + * When SHD from Machine1 takes (gfid, basename=NULL) it needs to be + * granted, without which self-heal can't progress. + * TODO: Find why 'owner_has_lock' is checked even for blocked locks. + */ + if (__blocked_entrylk_conflict(dom, lock) && + !(__owner_has_lock(dom, lock))) { + if (nonblock == 0) { + gf_log(this->name, GF_LOG_DEBUG, + "Lock is grantable, but blocking to prevent " + "starvation"); } - switch (type) { - case ENTRYLK_WRLCK: - list_add (&lock->domain_list, &dom->entrylk_list); - break; + ret = __lock_blocked_add(this, pinode, dom, lock, nonblock); + goto out; + } - default: - - gf_log (this->name, GF_LOG_DEBUG, - "Invalid type for entrylk specified: %d", type); - ret = -EINVAL; - goto out; - } + __pl_entrylk_ref(lock); + lock->granted_time = gf_time(); + list_add(&lock->domain_list, &dom->entrylk_list); - ret = 0; + ret = 0; out: - return ret; + return ret; } /** - * __unlock_name - unlock a name in a directory + * __unlock_entrylk - unlock a name in a directory * @inode: inode for the directory to unlock in * @basename: name of the entry to unlock * if null, unlock the entire directory */ pl_entry_lock_t * -__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type) +__unlock_entrylk(pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *ret_lock = NULL; + pl_entry_lock_t *ret_lock = NULL; - lock = __find_most_matching_lock (dom, basename); + ret_lock = __find_matching_lock(dom, lock); - if (!lock) { - gf_log ("locks", GF_LOG_DEBUG, - "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found", - basename); - goto out; - } + if (ret_lock) { + list_del_init(&ret_lock->domain_list); + } else { + gf_log("locks", GF_LOG_ERROR, + "unlock on %s " + "(type=ENTRYLK_WRLCK) attempted but no matching lock " + "found", + lock->basename); + } - if (names_equal (lock->basename, basename) - && lock->type == type) { + return ret_lock; +} - if (type == ENTRYLK_WRLCK) { - list_del (&lock->domain_list); - ret_lock = lock; - } - } else { - gf_log ("locks", GF_LOG_DEBUG, - "Unlock for a non-existing lock!"); - goto out; - } +int32_t +check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename) +{ + int32_t entrylk = 0; + pl_dom_list_t *dom = NULL; + pl_entry_lock_t *conf = NULL; + + pl_inode_t *pinode = pl_inode_get(this, parent, NULL); + if (!pinode) + goto out; + pthread_mutex_lock(&pinode->mutex); + { + list_for_each_entry(dom, &pinode->dom_list, inode_list) + { + conf = __find_most_matching_lock(dom, basename); + if (conf && conf->basename) { + entrylk = 1; + break; + } + } + } + pthread_mutex_unlock(&pinode->mutex); out: - return ret_lock; + return entrylk; } - void -__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct list_head *granted) +__grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct list_head *granted, + struct timespec *now, struct list_head *contend) { - int bl_ret = 0; - pl_entry_lock_t *bl = NULL; - pl_entry_lock_t *tmp = NULL; - - struct list_head blocked_list; - - INIT_LIST_HEAD (&blocked_list); - list_splice_init (&dom->blocked_entrylks, &blocked_list); - - list_for_each_entry_safe (bl, tmp, &blocked_list, - blocked_locks) { + int bl_ret = 0; + pl_entry_lock_t *bl = NULL; + pl_entry_lock_t *tmp = NULL; - list_del_init (&bl->blocked_locks); + struct list_head blocked_list; + INIT_LIST_HEAD(&blocked_list); + list_splice_init(&dom->blocked_entrylks, &blocked_list); - gf_log ("locks", GF_LOG_TRACE, - "Trying to unblock: {pinode=%p, basename=%s}", - pl_inode, bl->basename); + list_for_each_entry_safe(bl, tmp, &blocked_list, blocked_locks) + { + list_del_init(&bl->blocked_locks); - bl_ret = __lock_name (pl_inode, bl->basename, bl->type, - bl->frame, dom, bl->this, 0); + bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); - if (bl_ret == 0) { - list_add (&bl->blocked_locks, granted); - } else { - if (bl->basename) - FREE (bl->basename); - FREE (bl); - } - } - return; + if (bl_ret == 0) { + list_add_tail(&bl->blocked_locks, granted); + } + } } /* Grants locks if possible which are blocked on a lock */ void -grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_entry_lock_t *unlocked, pl_dom_list_t *dom) +grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) { - struct list_head granted_list; - pl_entry_lock_t *tmp = NULL; - pl_entry_lock_t *lock = NULL; - - INIT_LIST_HEAD (&granted_list); - - pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_entry_locks (this, pl_inode, dom, &granted_list); - } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { - list_del_init (&lock->blocked_locks); - - entrylk_trace_out (this, lock->frame, NULL, NULL, NULL, - lock->basename, ENTRYLK_LOCK, lock->type, - 0, 0); - - STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0); - - FREE (lock->basename); - FREE (lock); - } - - FREE (unlocked->basename); - FREE (unlocked); - - return; + struct list_head granted_list; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lock = NULL; + + INIT_LIST_HEAD(&granted_list); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_entry_locks(this, pl_inode, dom, &granted_list, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + list_for_each_entry_safe(lock, tmp, &granted_list, blocked_locks) + { + entrylk_trace_out(this, lock->frame, NULL, NULL, NULL, lock->basename, + ENTRYLK_LOCK, lock->type, 0, 0); + + STACK_UNWIND_STRICT(entrylk, lock->frame, 0, 0, NULL); + lock->frame = NULL; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(lock, tmp, &granted_list, blocked_locks) + { + list_del_init(&lock->blocked_locks); + __pl_entrylk_unref(lock); + } + } + pthread_mutex_unlock(&pl_inode->mutex); } -/** - * release_entry_locks_for_transport: release all entry locks from this - * transport for this loc_t - */ +/* Common entrylk code called by pl_entrylk and pl_fentrylk */ +int +pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + inode_t *inode, const char *basename, entrylk_cmd cmd, + entrylk_type type, loc_t *loc, fd_t *fd, dict_t *xdata) -static int -release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode, - pl_dom_list_t *dom, transport_t *trans) { - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *tmp = NULL; - struct list_head granted; - struct list_head released; - - INIT_LIST_HEAD (&granted); - INIT_LIST_HEAD (&released); - - pthread_mutex_lock (&pinode->mutex); - { - list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks, - blocked_locks) { - if (lock->trans != trans) - continue; - - list_del_init (&lock->blocked_locks); - - gf_log (this->name, GF_LOG_TRACE, - "releasing lock on held by " - "{transport=%p}",trans); - - list_add (&lock->blocked_locks, &released); - + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + char unwind = 1; + GF_UNUSED int dict_ret = -1; + pl_inode_t *pinode = NULL; + pl_entry_lock_t *reqlock = NULL; + pl_entry_lock_t *unlocked = NULL; + pl_dom_list_t *dom = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + int nonblock = 0; + gf_boolean_t need_inode_unref = _gf_false; + posix_locks_private_t *priv = NULL; + struct list_head *pcontend = NULL; + struct list_head contend; + struct timespec now = {}; + + priv = this->private; + + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + if (xdata) + dict_ret = dict_get_str(xdata, "connection-id", &conn_id); + + pinode = pl_inode_get(this, inode, NULL); + if (!pinode) { + op_errno = ENOMEM; + goto out; + } + + if (frame->root->client) { + ctx = pl_ctx_get(frame->root->client, this); + if (!ctx) { + op_errno = ENOMEM; + gf_log(this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + } + + dom = get_domain(pinode, volume); + if (!dom) { + op_errno = ENOMEM; + goto out; + } + + entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type); + + reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame, + conn_id, &op_errno); + if (!reqlock) { + op_ret = -1; + goto unwind; + } + + /* Ideally, AFTER a successful lock (both blocking and non-blocking) or + * an unsuccessful blocking lock operation, the inode needs to be ref'd. + * + * But doing so might give room to a race where the lock-requesting + * client could send a DISCONNECT just before this thread refs the inode + * after the locking is done, and the epoll thread could unref the inode + * in cleanup which means the inode's refcount would come down to 0, and + * the call to pl_forget() at this point destroys @pinode. Now when + * the io-thread executing this function tries to access pinode, + * it could crash on account of illegal memory access. + * + * To get around this problem, the inode is ref'd once even before + * adding the lock into client_list as a precautionary measure. + * This way even if there are DISCONNECTs, there will always be 1 extra + * ref on the inode, so @pinode is still alive until after the + * current stack unwinds. + */ + pinode->inode = inode_ref(inode); + if (priv->revocation_secs != 0) { + if (cmd != ENTRYLK_UNLOCK) { + __entrylk_prune_stale(this, pinode, dom, reqlock); + } else if (priv->monkey_unlocking == _gf_true) { + if (pl_does_monkey_want_stuck_lock()) { + gf_log(this->name, GF_LOG_WARNING, + "MONKEY LOCKING (forcing stuck lock)!"); + op_ret = 0; + need_inode_unref = _gf_true; + pthread_mutex_lock(&pinode->mutex); + { + __pl_entrylk_unref(reqlock); } + pthread_mutex_unlock(&pinode->mutex); + goto out; + } + } + } - list_for_each_entry_safe (lock, tmp, &dom->entrylk_list, - domain_list) { - if (lock->trans != trans) - continue; - - list_del_init (&lock->domain_list); - - gf_log (this->name, GF_LOG_TRACE, - "releasing lock on held by " - "{transport=%p}",trans); - - FREE (lock->basename); - FREE (lock); - } - - __grant_blocked_entry_locks (this, pinode, dom, &granted); - - } + switch (cmd) { + case ENTRYLK_LOCK_NB: + nonblock = 1; + /* fall through */ + case ENTRYLK_LOCK: + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pinode->mutex); + { + reqlock->pinode = pinode; + + ret = __lock_entrylk(this, pinode, reqlock, nonblock, dom, &now, + pcontend); + if (ret == 0) { + reqlock->frame = NULL; + op_ret = 0; + } else { + op_errno = -ret; + } - pthread_mutex_unlock (&pinode->mutex); + if (ctx && (!ret || !nonblock)) + list_add(&reqlock->client_list, &ctx->entrylk_lockers); - list_for_each_entry_safe (lock, tmp, &released, blocked_locks) { - list_del_init (&lock->blocked_locks); + if (ret == -EAGAIN && !nonblock) { + /* blocked */ + unwind = 0; + } else { + __pl_entrylk_unref(reqlock); + } - STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN); + /* For all but the case where a non-blocking lock + * attempt fails, the extra ref taken before the switch + * block must be negated. + */ + if ((ret == -EAGAIN) && (nonblock)) + need_inode_unref = _gf_true; + } + pthread_mutex_unlock(&pinode->mutex); + if (ctx) + pthread_mutex_unlock(&ctx->lock); + break; - if (lock->basename) - FREE (lock->basename); - FREE (lock); + case ENTRYLK_UNLOCK: + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pinode->mutex); + { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done before the switch + * block must be negated. Towards this, + * @need_inode_unref flag is set unconditionally here. + */ + need_inode_unref = _gf_true; + unlocked = __unlock_entrylk(dom, reqlock); + if (unlocked) { + list_del_init(&unlocked->client_list); + __pl_entrylk_unref(unlocked); + op_ret = 0; + } else { + op_errno = EINVAL; + } + __pl_entrylk_unref(reqlock); + } + pthread_mutex_unlock(&pinode->mutex); + if (ctx) + pthread_mutex_unlock(&ctx->lock); - } + grant_blocked_entry_locks(this, pinode, dom, &now, pcontend); - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - list_del_init (&lock->blocked_locks); + break; - STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0); + default: + need_inode_unref = _gf_true; + gf_log(this->name, GF_LOG_ERROR, + "Unexpected case in entrylk (cmd=%d). Please file" + "a bug report at http://bugs.gluster.com", + cmd); + goto out; + } + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ + if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0)) + inode_unref(pinode->inode); - if (lock->basename) - FREE (lock->basename); - FREE (lock); - } +out: - return 0; -} + if (need_inode_unref) + inode_unref(pinode->inode); -/* Common entrylk code called by pl_entrylk and pl_fentrylk */ -int -pl_common_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, inode_t *inode, const char *basename, - entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - - transport_t * transport = NULL; - pid_t pid = -1; - uint64_t owner = -1; - - pl_inode_t * pinode = NULL; - int ret = -1; - pl_entry_lock_t *unlocked = NULL; - char unwind = 1; - - pl_dom_list_t *dom = NULL; - - pinode = pl_inode_get (this, inode); - if (!pinode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_errno = ENOMEM; - goto out; - } - - dom = get_domain (pinode, volume); - if (!dom){ - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto out; - } - - entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type); - - pid = frame->root->pid; - owner = (uint64_t)(long) frame->root; - transport = frame->root->trans; - - if (pid == 0) { - /* - this is a special case that means release - all locks from this transport - */ - - gf_log (this->name, GF_LOG_TRACE, - "Releasing locks for transport %p", transport); - - release_entry_locks_for_transport (this, pinode, dom, transport); - op_ret = 0; - - goto out; - } - - switch (cmd) { - case ENTRYLK_LOCK: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, dom, this, 0); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - if (ret == -EAGAIN) - unwind = 0; - op_errno = -ret; - goto out; - } - - break; - - case ENTRYLK_LOCK_NB: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, dom, this, 1); - } - pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - op_errno = -ret; - goto out; - } - - break; - - case ENTRYLK_UNLOCK: - pthread_mutex_lock (&pinode->mutex); - { - unlocked = __unlock_name (dom, basename, type); - } - pthread_mutex_unlock (&pinode->mutex); - - if (unlocked) - grant_blocked_entry_locks (this, pinode, unlocked, dom); - - break; - - default: - gf_log (this->name, GF_LOG_ERROR, - "Unexpected case in entrylk (cmd=%d). Please file" - "a bug report at http://bugs.gluster.com", cmd); - goto out; - } - - op_ret = 0; -out: - pl_update_refkeeper (this, inode); - if (unwind) { - entrylk_trace_out (this, frame, volume, fd, loc, basename, - cmd, type, op_ret, op_errno); - - STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno); - } else { - entrylk_trace_block (this, frame, volume, fd, loc, basename, - cmd, type); - } + if (unwind) { + entrylk_trace_out(this, frame, volume, fd, loc, basename, cmd, type, + op_ret, op_errno); + unwind: + STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL); + } + if (pcontend != NULL) { + entrylk_contention_notify(this, pcontend); + } - return 0; + return 0; } /** @@ -712,17 +947,16 @@ out: */ int -pl_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) +pl_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { + pl_common_entrylk(frame, this, volume, loc->inode, basename, cmd, type, loc, + NULL, xdata); - pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type, loc, NULL); - - return 0; + return 0; } - /** * pl_fentrylk: * @@ -730,73 +964,190 @@ pl_entrylk (call_frame_t *frame, xlator_t *this, */ int -pl_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) +pl_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { + pl_common_entrylk(frame, this, volume, fd->inode, basename, cmd, type, NULL, + fd, xdata); - pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type, NULL, fd); - - return 0; + return 0; } - -static int32_t -__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode) +static void +pl_entrylk_log_cleanup(pl_entry_lock_t *lock) { - int32_t count = 0; - pl_entry_lock_t *lock = NULL; - pl_dom_list_t *dom = NULL; + pl_inode_t *pinode = NULL; + + pinode = lock->pinode; - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { + gf_log(THIS->name, GF_LOG_WARNING, + "releasing lock on %s held by " + "{client=%p, pid=%" PRId64 " lk-owner=%s}", + uuid_utoa(pinode->gfid), lock->client, (uint64_t)lock->client_pid, + lkowner_utoa(&lock->owner)); +} - gf_log (this->name, GF_LOG_DEBUG, - " XATTR DEBUG" - " domain: %s %s on %s state = Active", - dom->domain, - lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : - "ENTRYLK_WRLCK", lock->basename); - count++; +/* Release all entrylks from this client */ +int +pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) +{ + posix_locks_private_t *priv; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *l = NULL; + pl_dom_list_t *dom = NULL; + pl_inode_t *pinode = NULL; + struct list_head *pcontend = NULL; + struct list_head released; + struct list_head unwind; + struct list_head contend; + struct timespec now = {}; + + INIT_LIST_HEAD(&released); + INIT_LIST_HEAD(&unwind); + + priv = this->private; + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + pthread_mutex_lock(&ctx->lock); + { + list_for_each_entry_safe(l, tmp, &ctx->entrylk_lockers, client_list) + { + pl_entrylk_log_cleanup(l); + + pinode = l->pinode; + + pthread_mutex_lock(&pinode->mutex); + { + /* If the entrylk object is part of granted list but not + * blocked list, then perform the following actions: + * i. delete the object from granted list; + * ii. grant other locks (from other clients) that may + * have been blocked on this entrylk; and + * iii. unref the object. + * + * If the entrylk object (L1) is part of both granted + * and blocked lists, then this means that a parallel + * unlock on another entrylk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in + * __grant_blocked_entry_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked entrylks from other clients, now that L1 is + * out of their way and then unref L1 in the end, and + * leave it to the other thread (the one executing + * unlock codepath) to unwind L1's frame, delete it from + * blocked_locks list, and perform the last unref on L1. + * + * If the entrylk object (L1) is part of blocked list + * only, the cleanup code path must: + * i. delete it from the blocked_locks list inside + * this critical section, + * ii. unwind its frame with EAGAIN, + * iii. try and grant blocked entry locks from other + * clients that were otherwise grantable, but were + * blocked to avoid leaving L1 to starve forever. + * iv. unref the object. + */ + list_del_init(&l->client_list); + + if (!list_empty(&l->domain_list)) { + list_del_init(&l->domain_list); + list_add_tail(&l->client_list, &released); + } else { + list_del_init(&l->blocked_locks); + list_add_tail(&l->client_list, &unwind); } + } + pthread_mutex_unlock(&pinode->mutex); + } + } + pthread_mutex_unlock(&ctx->lock); - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { + if (!list_empty(&unwind)) { + list_for_each_entry_safe(l, tmp, &unwind, client_list) + { + list_del_init(&l->client_list); - gf_log (this->name, GF_LOG_DEBUG, - " XATTR DEBUG" - " domain: %s %s on %s state = Blocked", - dom->domain, - lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : - "ENTRYLK_WRLCK", lock->basename); - count++; - } + if (l->frame) + STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL); + list_add_tail(&l->client_list, &released); + } + } + + if (!list_empty(&released)) { + list_for_each_entry_safe(l, tmp, &released, client_list) + { + list_del_init(&l->client_list); + + pinode = l->pinode; + dom = get_domain(pinode, l->volume); + + grant_blocked_entry_locks(this, pinode, dom, &now, pcontend); + + pthread_mutex_lock(&pinode->mutex); + { + __pl_entrylk_unref(l); + } + pthread_mutex_unlock(&pinode->mutex); + + inode_unref(pinode->inode); } + } - return count; + if (pcontend != NULL) { + entrylk_contention_notify(this, pcontend); + } + + return 0; } int32_t -get_entrylk_count (xlator_t *this, inode_t *inode) +__get_entrylk_count(xlator_t *this, pl_inode_t *pl_inode) { - pl_inode_t *pl_inode = NULL; - uint64_t tmp_pl_inode = 0; - int ret = 0; - int32_t count = 0; + int32_t count = 0; + pl_entry_lock_t *lock = NULL; + pl_dom_list_t *dom = NULL; - ret = inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret != 0) { - goto out; - } - - pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->entrylk_list, domain_list) { count++; } - pthread_mutex_lock (&pl_inode->mutex); + list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) { - count = __get_entrylk_count (this, pl_inode); + count++; } - pthread_mutex_unlock (&pl_inode->mutex); + } + + return count; +} + +int32_t +get_entrylk_count(xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = 0; + int32_t count = 0; + + ret = inode_ctx_get(inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } + + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + + pthread_mutex_lock(&pl_inode->mutex); + { + count = __get_entrylk_count(this, pl_inode); + } + pthread_mutex_unlock(&pl_inode->mutex); out: - return count; + return count; } |
