summaryrefslogtreecommitdiffstats
path: root/xlators/features
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2016-06-15 14:42:19 +0200
committerPranith Kumar Karampuri <pkarampu@redhat.com>2018-01-16 10:37:22 +0000
commit7ba7a4b27d124f4ee16fe4776a4670cd5b0160c4 (patch)
tree73cca7226576b7ce3e480dc991aa70b9cc7cab07 /xlators/features
parent3e9a9c029fac359477fb26d9cc7803749ba038b2 (diff)
locks: added inodelk/entrylk contention upcall notifications
The locks xlator now is able to send a contention notification to the current owner of the lock. This is only a notification that can be used to improve performance of some client side operations that might benefit from extended duration of lock ownership. Nothing is done if the lock owner decides to ignore the message and to not release the lock. For forced release of acquired resources, leases must be used. Change-Id: I7f1ad32a0b4b445505b09908a050080ad848f8e0 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Diffstat (limited to 'xlators/features')
-rw-r--r--xlators/features/locks/src/clear.c36
-rw-r--r--xlators/features/locks/src/common.h12
-rw-r--r--xlators/features/locks/src/entrylk.c290
-rw-r--r--xlators/features/locks/src/inodelk.c309
-rw-r--r--xlators/features/locks/src/locks.h8
-rw-r--r--xlators/features/locks/src/pl-messages.h4
-rw-r--r--xlators/features/locks/src/posix.c40
7 files changed, 540 insertions, 159 deletions
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index a76d6beacb1..1609fc416d2 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -200,6 +200,7 @@ int
clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
clrlk_args *args, int *blkd, int *granted, int *op_errno)
{
+ posix_locks_private_t *priv;
pl_inode_lock_t *ilock = NULL;
pl_inode_lock_t *tmp = NULL;
struct gf_flock ulock = {0, };
@@ -207,9 +208,20 @@ clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
int bcount = 0;
int gcount = 0;
gf_boolean_t chk_range = _gf_false;
+ struct list_head *pcontend = NULL;
struct list_head released;
+ struct list_head contend;
+ struct timespec now = { };
INIT_LIST_HEAD (&released);
+
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
*op_errno = EINVAL;
goto out;
@@ -283,7 +295,10 @@ granted:
ret = 0;
out:
- grant_blocked_inode_locks (this, pl_inode, dom);
+ grant_blocked_inode_locks (this, pl_inode, dom, &now, pcontend);
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
*blkd = bcount;
*granted = gcount;
return ret;
@@ -294,15 +309,27 @@ int
clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
clrlk_args *args, int *blkd, int *granted, int *op_errno)
{
+ posix_locks_private_t *priv;
pl_entry_lock_t *elock = NULL;
pl_entry_lock_t *tmp = NULL;
int bcount = 0;
int gcount = 0;
int ret = -1;
+ struct list_head *pcontend = NULL;
struct list_head removed;
struct list_head released;
+ struct list_head contend;
+ struct timespec now;
INIT_LIST_HEAD (&released);
+
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
if (args->kind & CLRLK_BLOCKED)
goto blkd;
@@ -361,12 +388,15 @@ granted:
list_del_init (&elock->domain_list);
list_add_tail (&elock->domain_list, &removed);
- __pl_entrylk_unref (elock);
+ __pl_entrylk_unref (elock);
}
}
pthread_mutex_unlock (&pl_inode->mutex);
- grant_blocked_entry_locks (this, pl_inode, dom);
+ grant_blocked_entry_locks (this, pl_inode, dom, &now, pcontend);
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
ret = 0;
out:
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 3729ca24bed..50c156feb38 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -69,7 +69,11 @@ get_domain (pl_inode_t *pl_inode, const char *volume);
void
grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom);
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend);
+
+void
+inodelk_contention_notify (xlator_t *this, struct list_head *contend);
void
__delete_inode_lock (pl_inode_lock_t *lock);
@@ -79,7 +83,11 @@ __pl_inodelk_unref (pl_inode_lock_t *lock);
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom);
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify (xlator_t *this, struct list_head *contend);
void pl_update_refkeeper (xlator_t *this, inode_t *inode);
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 6698516fc83..008d05a34c4 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -13,17 +13,19 @@
#include "logging.h"
#include "common-utils.h"
#include "list.h"
+#include "upcall-utils.h"
#include "locks.h"
#include "clear.h"
#include "common.h"
+#include "pl-messages.h"
void
__pl_entrylk_unref (pl_entry_lock_t *lock)
{
lock->ref--;
if (!lock->ref) {
- GF_FREE ((char *)lock->basename);
+ GF_FREE ((char *)lock->basename);
GF_FREE (lock->connection_id);
GF_FREE (lock);
}
@@ -39,7 +41,7 @@ __pl_entrylk_ref (pl_entry_lock_t *lock)
static pl_entry_lock_t *
new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- const char *domain, call_frame_t *frame, char *conn_id)
+ const char *domain, call_frame_t *frame, char *conn_id)
{
pl_entry_lock_t *newlock = NULL;
@@ -55,7 +57,7 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
newlock->client_pid = frame->root->pid;
newlock->volume = domain;
newlock->owner = frame->root->lk_owner;
- newlock->frame = frame;
+ newlock->frame = frame;
newlock->this = frame->this;
if (conn_id) {
@@ -64,9 +66,9 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
INIT_LIST_HEAD (&newlock->domain_list);
INIT_LIST_HEAD (&newlock->blocked_locks);
- INIT_LIST_HEAD (&newlock->client_list);
+ INIT_LIST_HEAD (&newlock->client_list);
- __pl_entrylk_ref (newlock);
+ __pl_entrylk_ref (newlock);
out:
return newlock;
}
@@ -201,6 +203,113 @@ out:
return revoke_lock;
}
+static gf_boolean_t
+__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now)
+{
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+
+ priv = this->private;
+
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return _gf_false;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return _gf_false;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pinode->inode);
+ __pl_entrylk_ref(lock);
+
+ lock->contention_time = *now;
+
+ return _gf_true;
+}
+
+void
+entrylk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+ struct gf_upcall up;
+ struct gf_upcall_entrylk_contention lc;
+ pl_entry_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_entry_lock_t, contend);
+
+ pl_inode = lock->pinode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->domain_list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ lc.type = lock->type;
+ lc.name = lock->basename;
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pinode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the entrylk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_ENTRYLK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "sent");
+ }
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ list_del_init(&lock->contend);
+ __pl_entrylk_unref(lock);
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ inode_unref(pl_inode->inode);
+ }
+}
+
+
/**
* entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
@@ -208,19 +317,27 @@ out:
* @type: type of lock
*/
static pl_entry_lock_t *
-__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock)
+__entrylk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
pl_entry_lock_t *tmp = NULL;
-
- if (list_empty (&dom->entrylk_list))
- return NULL;
+ pl_entry_lock_t *ret = NULL;
list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
- if (__conflicting_entrylks (tmp, lock))
- return tmp;
+ if (__conflicting_entrylks (tmp, lock)) {
+ if (ret == NULL) {
+ ret = tmp;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ if (__entrylk_needs_contention_notify(this, tmp, now)) {
+ list_add_tail(&tmp->contend, contend);
+ }
+ }
}
- return NULL;
+ return ret;
}
static pl_entry_lock_t *
@@ -228,9 +345,6 @@ __blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
pl_entry_lock_t *tmp = NULL;
- if (list_empty (&dom->blocked_entrylks))
- return NULL;
-
list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) {
if (names_conflict (tmp->basename, lock->basename))
return lock;
@@ -426,6 +540,27 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
return NULL;
}
+static int
+__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock, int nonblock)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ if (nonblock)
+ goto out;
+
+ lock->blkd_time = now;
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_msg_trace (this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, lock->basename);
+
+out:
+ return -EAGAIN;
+}
+
/**
* __lock_entrylk - lock a name in a directory
* @inode: inode for the directory in which to lock
@@ -439,24 +574,15 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
int
__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
- int nonblock, pl_dom_list_t *dom)
+ int nonblock, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
pl_entry_lock_t *conf = NULL;
int ret = -EAGAIN;
- conf = __entrylk_grantable (dom, lock);
+ conf = __entrylk_grantable (this, dom, lock, now, contend);
if (conf) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, lock->basename);
-
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
goto out;
}
@@ -471,20 +597,15 @@ __lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
* granted, without which self-heal can't progress.
* TODO: Find why 'owner_has_lock' is checked even for blocked locks.
*/
- if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, lock->basename);
+ if (__blocked_entrylk_conflict (dom, lock) &&
+ !(__owner_has_lock (dom, lock))) {
+ if (nonblock == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
+ }
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
goto out;
}
@@ -551,7 +672,8 @@ out:
void
__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct list_head *granted)
+ pl_dom_list_t *dom, struct list_head *granted,
+ struct timespec *now, struct list_head *contend)
{
int bl_ret = 0;
pl_entry_lock_t *bl = NULL;
@@ -566,7 +688,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
list_del_init (&bl->blocked_locks);
- bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom);
+ bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom, now,
+ contend);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
@@ -578,7 +701,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grants locks if possible which are blocked on a lock */
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom)
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
struct list_head granted_list;
pl_entry_lock_t *tmp = NULL;
@@ -589,7 +713,7 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_lock (&pl_inode->mutex);
{
__grant_blocked_entry_locks (this, pl_inode, dom,
- &granted_list);
+ &granted_list, now, contend);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -610,8 +734,6 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
}
}
pthread_mutex_unlock (&pl_inode->mutex);
-
- return;
}
@@ -637,9 +759,18 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
int nonblock = 0;
gf_boolean_t need_inode_unref = _gf_false;
posix_locks_private_t *priv = NULL;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct timespec now = { };
priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -722,7 +853,8 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
{
reqlock->pinode = pinode;
- ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom);
+ ret = __lock_entrylk (this, pinode, reqlock, nonblock,
+ dom, &now, pcontend);
if (ret == 0) {
reqlock->frame = NULL;
op_ret = 0;
@@ -778,7 +910,7 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
if (ctx)
pthread_mutex_unlock (&ctx->lock);
- grant_blocked_entry_locks (this, pinode, dom);
+ grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
break;
@@ -810,6 +942,10 @@ unwind:
cmd, type);
}
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
+
return 0;
}
@@ -868,27 +1004,37 @@ pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
int
pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ posix_locks_private_t *priv;
pl_entry_lock_t *tmp = NULL;
pl_entry_lock_t *l = NULL;
- pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom = NULL;
pl_inode_t *pinode = NULL;
-
+ struct list_head *pcontend = NULL;
struct list_head released;
struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = { };
INIT_LIST_HEAD (&released);
INIT_LIST_HEAD (&unwind);
- pthread_mutex_lock (&ctx->lock);
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock (&ctx->lock);
{
list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
- client_list) {
- pl_entrylk_log_cleanup (l);
+ client_list) {
+ pl_entrylk_log_cleanup (l);
- pinode = l->pinode;
+ pinode = l->pinode;
- pthread_mutex_lock (&pinode->mutex);
- {
+ pthread_mutex_lock (&pinode->mutex);
+ {
/* If the entrylk object is part of granted list but not
* blocked list, then perform the following actions:
* i. delete the object from granted list;
@@ -931,38 +1077,42 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
&unwind);
}
}
- pthread_mutex_unlock (&pinode->mutex);
+ pthread_mutex_unlock (&pinode->mutex);
}
- }
+ }
pthread_mutex_unlock (&ctx->lock);
list_for_each_entry_safe (l, tmp, &unwind, client_list) {
list_del_init (&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
- NULL);
+ if (l->frame)
+ STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
+ NULL);
list_add_tail (&l->client_list, &released);
}
list_for_each_entry_safe (l, tmp, &released, client_list) {
list_del_init (&l->client_list);
- pinode = l->pinode;
+ pinode = l->pinode;
- dom = get_domain (pinode, l->volume);
+ dom = get_domain (pinode, l->volume);
- grant_blocked_entry_locks (this, pinode, dom);
+ grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
- pthread_mutex_lock (&pinode->mutex);
- {
- __pl_entrylk_unref (l);
- }
- pthread_mutex_unlock (&pinode->mutex);
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (l);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
inode_unref (pinode->inode);
}
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
+
return 0;
}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 64ffb00c18c..890ac8b6d00 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -13,10 +13,12 @@
#include "logging.h"
#include "common-utils.h"
#include "list.h"
+#include "upcall-utils.h"
#include "locks.h"
#include "clear.h"
#include "common.h"
+#include "pl-messages.h"
void
__delete_inode_lock (pl_inode_lock_t *lock)
@@ -229,22 +231,134 @@ out:
return revoke_lock;
}
+static gf_boolean_t
+__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now)
+{
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+
+ priv = this->private;
+
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return _gf_false;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return _gf_false;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pl_inode->inode);
+ __pl_inodelk_ref(lock);
+
+ lock->contention_time = *now;
+
+ return _gf_true;
+}
+
+void
+inodelk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+ struct gf_upcall up;
+ struct gf_upcall_inodelk_contention lc;
+ pl_inode_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, contend);
+
+ pl_inode = lock->pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock));
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pl_inode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the inodelk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_INODELK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "sent");
+ }
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ list_del_init(&lock->contend);
+ __pl_inodelk_unref(lock);
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ inode_unref(pl_inode->inode);
+ }
+}
+
/* Determine if lock is grantable or not */
static pl_inode_lock_t *
-__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+__inodelk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
pl_inode_lock_t *l = NULL;
pl_inode_lock_t *ret = NULL;
- if (list_empty (&dom->inodelk_list))
- goto out;
+
list_for_each_entry (l, &dom->inodelk_list, list){
if (inodelk_conflict (lock, l) &&
!same_inodelk_owner (lock, l)) {
- ret = l;
- goto out;
+ if (ret == NULL) {
+ ret = l;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ if (__inodelk_needs_contention_notify(this, l, now)) {
+ list_add_tail(&l->contend, contend);
+ }
}
}
-out:
+
return ret;
}
@@ -252,20 +366,14 @@ static pl_inode_lock_t *
__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
{
pl_inode_lock_t *l = NULL;
- pl_inode_lock_t *ret = NULL;
-
- if (list_empty (&dom->blocked_inodelks))
- return NULL;
list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
if (inodelk_conflict (lock, l)) {
- ret = l;
- goto out;
+ return l;
}
}
-out:
- return ret;
+ return NULL;
}
static int
@@ -286,35 +394,45 @@ __owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
return 0;
}
+static int
+__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ int can_block)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ if (can_block == 0) {
+ goto out;
+ }
+
+ lock->blkd_time = now;
+ list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_msg_trace (this->name, 0, "%s (pid=%d) (lk-owner=%s) %"PRId64" - "
+ "%"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+
+out:
+ return -EAGAIN;
+}
/* Determines if lock can be granted and adds the lock. If the lock
* is blocking, adds it to the blocked_inodelks list of the domain.
*/
static int
__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+ int can_block, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
pl_inode_lock_t *conf = NULL;
int ret = -EINVAL;
- conf = __inodelk_grantable (dom, lock);
+ conf = __inodelk_grantable (this, dom, lock, now, contend);
if (conf) {
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
-
+ ret = __lock_blocked_add(this, dom, lock, can_block);
goto out;
}
@@ -330,25 +448,15 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
* will not be unlocked by SHD from Machine1.
* TODO: Find why 'owner_has_lock' is checked even for blocked locks.
*/
- if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ if (__blocked_lock_conflict (dom, lock) &&
+ !(__owner_has_lock (dom, lock))) {
+ if (can_block != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
+ }
+ ret = __lock_blocked_add(this, dom, lock, can_block);
goto out;
}
__pl_inodelk_ref (lock);
@@ -417,7 +525,8 @@ out:
static void
__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *granted, pl_dom_list_t *dom)
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend)
{
int bl_ret = 0;
pl_inode_lock_t *bl = NULL;
@@ -432,7 +541,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
list_del_init (&bl->blocked_locks);
- bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+ bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom, now,
+ contend);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
@@ -444,7 +554,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grant all inodelks blocked on a lock */
void
grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom)
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
struct list_head granted;
pl_inode_lock_t *lock;
@@ -454,7 +565,8 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_lock (&pl_inode->mutex);
{
- __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
+ __grant_blocked_inode_locks (this, pl_inode, &granted, dom,
+ now, contend);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -471,7 +583,7 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
&lock->user_flock, 0, 0, lock->volume);
STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
- lock->frame = NULL;
+ lock->frame = NULL;
}
pthread_mutex_lock (&pl_inode->mutex);
@@ -488,9 +600,9 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
static void
pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
{
- pl_inode_t *pl_inode = NULL;
+ pl_inode_t *pl_inode = NULL;
- pl_inode = lock->pl_inode;
+ pl_inode = lock->pl_inode;
gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by "
"{client=%p, pid=%"PRId64" lk-owner=%s}",
@@ -503,27 +615,38 @@ pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
int
pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ posix_locks_private_t *priv;
pl_inode_lock_t *tmp = NULL;
pl_inode_lock_t *l = NULL;
- pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom = NULL;
pl_inode_t *pl_inode = NULL;
-
+ struct list_head *pcontend = NULL;
struct list_head released;
struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = { };
+
+ priv = this->private;
INIT_LIST_HEAD (&released);
INIT_LIST_HEAD (&unwind);
- pthread_mutex_lock (&ctx->lock);
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock (&ctx->lock);
{
list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
- client_list) {
- pl_inodelk_log_cleanup (l);
+ client_list) {
+ pl_inodelk_log_cleanup (l);
- pl_inode = l->pl_inode;
+ pl_inode = l->pl_inode;
- pthread_mutex_lock (&pl_inode->mutex);
- {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
/* If the inodelk object is part of granted list but not
* blocked list, then perform the following actions:
* i. delete the object from granted list;
@@ -567,45 +690,49 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
&unwind);
}
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ pthread_mutex_unlock (&pl_inode->mutex);
}
- }
+ }
pthread_mutex_unlock (&ctx->lock);
list_for_each_entry_safe (l, tmp, &unwind, client_list) {
list_del_init (&l->client_list);
if (l->frame)
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
- NULL);
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
+ NULL);
list_add_tail (&l->client_list, &released);
-
}
list_for_each_entry_safe (l, tmp, &released, client_list) {
list_del_init (&l->client_list);
- pl_inode = l->pl_inode;
+ pl_inode = l->pl_inode;
- dom = get_domain (pl_inode, l->volume);
+ dom = get_domain (pl_inode, l->volume);
- grant_blocked_inode_locks (this, pl_inode, dom);
+ grant_blocked_inode_locks (this, pl_inode, dom, &now,
+ pcontend);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __pl_inodelk_unref (l);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (l);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
inode_unref (pl_inode->inode);
}
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+
return 0;
}
static int
pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
- pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
inode_t *inode)
{
posix_locks_private_t *priv = NULL;
@@ -613,9 +740,12 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
pl_inode_lock_t *retlock = NULL;
gf_boolean_t unref = _gf_true;
gf_boolean_t need_inode_unref = _gf_false;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct timespec now = { };
short fl_type;
- lock->pl_inode = pl_inode;
+ lock->pl_inode = pl_inode;
fl_type = lock->fl_type;
priv = this->private;
@@ -657,12 +787,19 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
}
}
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
if (ctx)
pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pl_inode->mutex);
{
if (lock->fl_type != F_UNLCK) {
- ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
+ ret = __lock_inodelk (this, pl_inode, lock, can_block,
+ dom, &now, pcontend);
if (ret == 0) {
lock->frame = NULL;
gf_log (this->name, GF_LOG_TRACE,
@@ -725,13 +862,18 @@ out:
*/
if ((fl_type == F_UNLCK) && (ret == 0)) {
inode_unref (pl_inode->inode);
- grant_blocked_inode_locks (this, pl_inode, dom);
+ grant_blocked_inode_locks (this, pl_inode, dom, &now,
+ pcontend);
}
if (need_inode_unref) {
inode_unref (pl_inode->inode);
}
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+
return ret;
}
@@ -771,7 +913,8 @@ new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
INIT_LIST_HEAD (&lock->list);
INIT_LIST_HEAD (&lock->blocked_locks);
- INIT_LIST_HEAD (&lock->client_list);
+ INIT_LIST_HEAD (&lock->client_list);
+ INIT_LIST_HEAD (&lock->contend);
__pl_inodelk_ref (lock);
return lock;
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 3d3b327f56c..c2edfff8f00 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -70,6 +70,7 @@ typedef struct __posix_lock posix_lock_t;
struct __pl_inode_lock {
struct list_head list;
struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ struct list_head contend; /* list of contending locks */
int ref;
short fl_type;
@@ -86,6 +87,8 @@ struct __pl_inode_lock {
struct timeval blkd_time; /*time at which lock was queued into blkd list*/
struct timeval granted_time; /*time at which lock was queued into active list*/
+ /*last time at wich lock contention was detected and notified*/
+ struct timespec contention_time;
/* These two together serve to uniquely identify each process
across nodes */
@@ -120,6 +123,7 @@ typedef struct _pl_dom_list pl_dom_list_t;
struct __entry_lock {
struct list_head domain_list; /* list_head back to pl_dom_list_t */
struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+ struct list_head contend; /* list of contending locks */
int ref;
call_frame_t *frame;
@@ -133,6 +137,8 @@ struct __entry_lock {
struct timeval blkd_time; /*time at which lock was queued into blkd list*/
struct timeval granted_time; /*time at which lock was queued into active list*/
+ /*last time at wich lock contention was detected and notified*/
+ struct timespec contention_time;
void *client;
gf_lkowner_t owner;
@@ -194,6 +200,8 @@ typedef struct {
uint32_t revocation_secs;
gf_boolean_t revocation_clear_all;
uint32_t revocation_max_blocked;
+ gf_boolean_t notify_contention;
+ uint32_t notify_contention_delay;
} posix_locks_private_t;
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
index 7a1e3f488e7..e5a276f35b5 100644
--- a/xlators/features/locks/src/pl-messages.h
+++ b/xlators/features/locks/src/pl-messages.h
@@ -24,7 +24,9 @@
*/
GLFS_MSGID(PL,
- PL_MSG_LOCK_NUMBER
+ PL_MSG_LOCK_NUMBER,
+ PL_MSG_INODELK_CONTENTION_FAILED,
+ PL_MSG_ENTRYLK_CONTENTION_FAILED
);
#endif /* !_PL_MESSAGES_H_ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 78bf160058c..82d77db1164 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -3641,6 +3641,13 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("revocation-max-blocked",
priv->revocation_max_blocked, options,
uint32, out);
+
+ GF_OPTION_RECONF ("notify-contention", priv->notify_contention,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("notify-contention-delay",
+ priv->notify_contention_delay, options, uint32, out);
+
ret = 0;
out:
@@ -3705,6 +3712,12 @@ init (xlator_t *this)
GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked,
uint32, out);
+ GF_OPTION_INIT ("notify-contention", priv->notify_contention, bool,
+ out);
+
+ GF_OPTION_INIT ("notify-contention-delay",
+ priv->notify_contention_delay, uint32, out);
+
this->local_pool = mem_pool_new (pl_local_t, 32);
if (!this->local_pool) {
ret = -1;
@@ -4461,5 +4474,32 @@ struct volume_options options[] = {
"will be revoked to allow the others to proceed. Can "
"be used in conjunction w/ revocation-clear-all."
},
+ { .key = {"notify-contention"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = { GD_OP_VERSION_4_0_0 },
+ .tags = { "locks", "contention" },
+ .description = "When this option is enabled and a lock request "
+ "conflicts with a currently granted lock, an upcall "
+ "notification will be sent to the current owner of "
+ "the lock to request it to be released as soon as "
+ "possible."
+ },
+ { .key = {"notify-contention-delay"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0, /* An upcall notification is sent every time a conflict is
+ * detected. */
+ .max = 60,
+ .default_value = "5",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = { GD_OP_VERSION_4_0_0 },
+ .tags = { "locks", "contention", "timeout" },
+ .description = "This value determines the minimum amount of time "
+ "(in seconds) between upcall contention notifications "
+ "on the same inode. If multiple lock requests are "
+ "received during this period, only one upcall will "
+ "be sent."
+ },
{ .key = {NULL} },
};