22 files changed, 1203 insertions, 264 deletions
diff --git a/libglusterfs/src/upcall-utils.h b/libglusterfs/src/upcall-utils.h
index 3b5dce33e45..48d10382c10 100644
--- a/libglusterfs/src/upcall-utils.h
+++ b/libglusterfs/src/upcall-utils.h
@@ -63,6 +63,8 @@ typedef enum {
         GF_UPCALL_EVENT_NULL,
         GF_UPCALL_CACHE_INVALIDATION,
         GF_UPCALL_RECALL_LEASE,
+        GF_UPCALL_INODELK_CONTENTION,
+        GF_UPCALL_ENTRYLK_CONTENTION,
 } gf_upcall_event_t;
 
 struct gf_upcall {
@@ -88,4 +90,19 @@ struct gf_upcall_recall_lease {
         dict_t   *dict;
 };
 
+struct gf_upcall_inodelk_contention {
+        struct gf_flock  flock;
+        pid_t            pid;
+        const char      *domain;
+        dict_t          *xdata;
+};
+
+struct gf_upcall_entrylk_contention {
+        uint32_t    type;
+        pid_t       pid;
+        const char *name;
+        const char *domain;
+        dict_t     *xdata;
+};
+
 #endif /* _UPCALL_UTILS_H */
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index aee34302205..aafd94400c6 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -150,6 +150,8 @@ enum gf_cbk_procnum {
         GF_CBK_CHILD_DOWN,
         GF_CBK_RECALL_LEASE,
         GF_CBK_STATEDUMP,
+        GF_CBK_INODELK_CONTENTION,
+        GF_CBK_ENTRYLK_CONTENTION,
         GF_CBK_MAXVALUE,
 };
 
diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h
index 2da5594a347..eef39416b5c 100644
--- a/rpc/xdr/src/glusterfs3.h
+++ b/rpc/xdr/src/glusterfs3.h
@@ -419,6 +419,162 @@ gf_proto_cache_invalidation_to_upcall (xlator_t *this,
         return ret;
 }
 
+static inline int
+gf_proto_inodelk_contention_to_upcall (struct gfs4_inodelk_contention_req *lc,
+                                       struct gf_upcall *gf_up_data)
+{
+        struct gf_upcall_inodelk_contention *tmp = NULL;
+        xlator_t *this                           = NULL;
+        int    ret                               = -1;
+        int    op_errno                          = EINVAL;
+
+        this = THIS;
+
+        GF_VALIDATE_OR_GOTO(this->name, lc, out);
+        GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+        tmp = (struct gf_upcall_inodelk_contention *)gf_up_data->data;
+
+        gf_uuid_copy(gf_up_data->gfid, (unsigned char *)lc->gfid);
+
+        gf_proto_flock_to_flock(&lc->flock, &tmp->flock);
+        tmp->pid = lc->pid;
+        tmp->domain = lc->domain;
+        if ((tmp->domain != NULL) && (*tmp->domain == 0)) {
+                tmp->domain = NULL;
+        }
+
+        GF_PROTOCOL_DICT_UNSERIALIZE (this, tmp->xdata, lc->xdata.xdata_val,
+                                      lc->xdata.xdata_len, ret, op_errno, out);
+
+        ret = 0;
+
+out:
+        if (ret < 0) {
+                ret = -op_errno;
+        }
+
+        return ret;
+}
+
+static inline int
+gf_proto_inodelk_contention_from_upcall (xlator_t *this,
+                                         struct gfs4_inodelk_contention_req *lc,
+                                         struct gf_upcall *gf_up_data)
+{
+        struct gf_upcall_inodelk_contention *tmp = NULL;
+        int    ret                               = -1;
+        int    op_errno                          = EINVAL;
+
+        GF_VALIDATE_OR_GOTO(this->name, lc, out);
+        GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+        tmp = (struct gf_upcall_inodelk_contention *)gf_up_data->data;
+
+        gf_uuid_copy((unsigned char *)lc->gfid, gf_up_data->gfid);
+
+        gf_proto_flock_from_flock(&lc->flock, &tmp->flock);
+        lc->pid = tmp->pid;
+        lc->domain = (char *)tmp->domain;
+        if (lc->domain == NULL) {
+                lc->domain = "";
+        }
+
+        GF_PROTOCOL_DICT_SERIALIZE (this, tmp->xdata, &lc->xdata.xdata_val,
+                                    lc->xdata.xdata_len, op_errno, out);
+
+        ret = 0;
+
+out:
+        if (ret < 0) {
+                ret = -op_errno;
+        }
+
+        return ret;
+}
+
+static inline int
+gf_proto_entrylk_contention_to_upcall (struct gfs4_entrylk_contention_req *lc,
+                                       struct gf_upcall *gf_up_data)
+{
+        struct gf_upcall_entrylk_contention *tmp = NULL;
+        xlator_t *this                           = NULL;
+        int    ret                               = -1;
+        int    op_errno                          = EINVAL;
+
+        this = THIS;
+
+        GF_VALIDATE_OR_GOTO(this->name, lc, out);
+        GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+        tmp = (struct gf_upcall_entrylk_contention *)gf_up_data->data;
+
+        gf_uuid_copy(gf_up_data->gfid, (unsigned char *)lc->gfid);
+
+        tmp->type = lc->type;
+        tmp->name = lc->name;
+        if ((tmp->name != NULL) && (*tmp->name == 0)) {
+                tmp->name = NULL;
+        }
+        tmp->pid = lc->pid;
+        tmp->domain = lc->domain;
+        if ((tmp->domain != NULL) && (*tmp->domain == 0)) {
+                tmp->domain = NULL;
+        }
+
+        GF_PROTOCOL_DICT_UNSERIALIZE (this, tmp->xdata, lc->xdata.xdata_val,
+                                      lc->xdata.xdata_len, ret, op_errno, out);
+
+        ret = 0;
+
+out:
+        if (ret < 0) {
+                ret = -op_errno;
+        }
+
+        return ret;
+}
+
+static inline int
+gf_proto_entrylk_contention_from_upcall (xlator_t *this,
+                                         struct gfs4_entrylk_contention_req *lc,
+                                         struct gf_upcall *gf_up_data)
+{
+        struct gf_upcall_entrylk_contention *tmp = NULL;
+        int    ret                               = -1;
+        int    op_errno                          = EINVAL;
+
+        GF_VALIDATE_OR_GOTO(this->name, lc, out);
+        GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+        tmp = (struct gf_upcall_entrylk_contention *)gf_up_data->data;
+
+        gf_uuid_copy((unsigned char *)lc->gfid, gf_up_data->gfid);
+
+        lc->type = tmp->type;
+        lc->name = (char *)tmp->name;
+        if (lc->name == NULL) {
+                lc->name = "";
+        }
+        lc->pid = tmp->pid;
+        lc->domain = (char *)tmp->domain;
+        if (lc->domain == NULL) {
+                lc->domain = "";
+        }
+
+        GF_PROTOCOL_DICT_SERIALIZE (this, tmp->xdata, &lc->xdata.xdata_val,
+                                    lc->xdata.xdata_len, op_errno, out);
+
+        ret = 0;
+
+out:
+        if (ret < 0) {
+                ret = -op_errno;
+        }
+
+        return ret;
+}
+
 extern int dict_to_xdr (dict_t *this, gfx_dict *xdict);
 extern int xdr_to_dict (gfx_dict *xdict, dict_t **to);
 
diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x
index 7396b566fa7..ef0cfde0802 100644
--- a/rpc/xdr/src/glusterfs4-xdr.x
+++ b/rpc/xdr/src/glusterfs4-xdr.x
@@ -86,3 +86,20 @@ struct gfs4_namelink_req {
        string bname<>;
        opaque xdata<>;
 };
+
+struct gfs4_inodelk_contention_req {
+        opaque                gfid[16];
+        struct gf_proto_flock flock;
+        unsigned int          pid;
+        string                domain<>;
+        opaque                xdata<>;
+};
+
+struct gfs4_entrylk_contention_req {
+        opaque                gfid[16];
+        unsigned int          type;
+        unsigned int          pid;
+        string                name<>;
+        string                domain<>;
+        opaque                xdata<>;
+};
diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym
index 8af956ef5a9..83f1efc732a 100644
--- a/rpc/xdr/src/libgfxdr.sym
+++ b/rpc/xdr/src/libgfxdr.sym
@@ -155,8 +155,12 @@ xdr_gfs3_xattrop_req
 xdr_gfs3_xattrop_rsp
 xdr_gfs3_zerofill_req
 xdr_gfs3_zerofill_rsp
+xdr_gfs4_entrylk_contention_req
+xdr_gfs4_entrylk_contention_rsp
 xdr_gfs4_icreate_req
 xdr_gfs4_icreate_rsp
+xdr_gfs4_inodelk_contention_req
+xdr_gfs4_inodelk_contention_rsp
 xdr_gfs4_namelink_req
 xdr_gfs4_namelink_rsp
 xdr_gf_set_lk_ver_req
diff --git a/tests/basic/ec/lock-contention.t b/tests/basic/ec/lock-contention.t
new file mode 100644
index 00000000000..8f86cee16ad
--- /dev/null
+++ b/tests/basic/ec/lock-contention.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# This test verifies that when 'lock-notify-contention' option is enabled,
+# locks xlator actually sends an upcall notification that causes the acquired
+# lock from one client to be released before it's supposed to when another
+# client accesses the file.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function elapsed_time() {
+        local start="`date +%s`"
+
+        if [[ "test" == `cat "$1"` ]]; then
+                echo "$((`date +%s` - ${start}))"
+        fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 features.locks-notify-contention off
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 6
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 6
+TEST $CLI volume start $V0
+
+TEST $GFS --direct-io-mode=yes --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
+
+TEST $GFS --direct-io-mode=yes --volfile-id=/$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M1
+
+TEST $(echo "test" >$M0/file)
+
+# With locks-notify-contention set to off, accessing the file from another
+# client should take 6 seconds. Checking against 3 seconds to be safe.
+elapsed="$(elapsed_time $M1/file)"
+TEST [[ ${elapsed} -ge 3 ]]
+
+elapsed="$(elapsed_time $M0/file)"
+TEST [[ ${elapsed} -ge 3 ]]
+
+TEST $CLI volume set $V0 features.locks-notify-contention on
+
+# With locks-notify-contention set to on, accessing the file from another
+# client should be fast. Checking against 3 seconds to be safe.
+elapsed="$(elapsed_time $M1/file)"
+TEST [[ ${elapsed} -le 3 ]]
+
+elapsed="$(elapsed_time $M0/file)"
+TEST [[ ${elapsed} -le 3 ]]
+
+cleanup
diff --git a/tests/volume.rc b/tests/volume.rc
index 3a48b43d2e0..3ee83624058 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -93,7 +93,8 @@ function remove_brick_status_completed_field {
 
 function get_mount_process_pid {
         local vol=$1
-        ps auxww | grep glusterfs | grep -E "volfile-id[ =]/?$vol " | awk '{print $2}' | head -1
+        local mnt=$2
+        ps auxww | grep glusterfs | grep -E "volfile-id[ =]/?$vol .*$mnt" | awk '{print $2}' | head -1
 }
 
 function get_nfs_pid ()
@@ -126,7 +127,8 @@ function generate_statedump {
 
 function generate_mount_statedump {
         local vol=$1
-        generate_statedump $(get_mount_process_pid $vol)
+        local mnt=$2
+        generate_statedump $(get_mount_process_pid $vol $mnt)
 }
 
 function cleanup_mount_statedump {
@@ -205,14 +207,16 @@ function ec_child_up_status {
         local vol=$1
         local dist_id=$2
         local brick_id=$(($3 + 1))
-        local mask=$(ec_get_info $vol $dist_id "childs_up_mask" $(generate_mount_statedump $vol))
+        local mnt=$4
+        local mask=$(ec_get_info $vol $dist_id "childs_up_mask" $(generate_mount_statedump $vol $mnt))
         echo "${mask: -$brick_id:1}"
 }
 
 function ec_child_up_count {
         local vol=$1
         local dist_id=$2
-        ec_get_info $vol $dist_id "childs_up" $(generate_mount_statedump $vol)
+        local mnt=$3
+        ec_get_info $vol $dist_id "childs_up" $(generate_mount_statedump $vol $mnt)
 }
 
 function ec_child_up_status_shd {
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index cb627a92c9c..fbc7ac97aa0 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -1847,6 +1847,67 @@ gf_boolean_t ec_lock_acquire(ec_lock_link_t *link)
     return _gf_true;
 }
 
+static ec_lock_link_t *
+ec_lock_timer_cancel(xlator_t *xl, ec_lock_t *lock)
+{
+        ec_lock_link_t *timer_link;
+
+        /* If we don't have any timer, there's nothing to cancel. */
+        if (lock->timer == NULL) {
+                return NULL;
+        }
+
+        /* We are trying to access a lock that has an unlock timer active.
+         * This means that the lock must be idle, i.e. no fop can be in the
+         * owner, waiting or frozen lists. It also means that the lock cannot
+         * have been marked as being released (this is done without timers).
+         * There should only be one owner reference, but it's possible that
+         * some fops are being prepared to use this lock. */
+        GF_ASSERT ((lock->refs_owners == 1) &&
+                   list_empty(&lock->owners) && list_empty(&lock->waiting));
+
+        /* We take the timer_link before cancelling the timer, since a
+         * successful cancellation will destroy it. It must not be NULL
+         * because it references the fop responsible for the delayed unlock
+         * that we are currently trying to cancel. */
+        timer_link = lock->timer->data;
+        GF_ASSERT(timer_link != NULL);
+
+        if (gf_timer_call_cancel(xl->ctx, lock->timer) < 0) {
+                /* It's too late to avoid the execution of the timer callback.
+                 * Since we need to be sure that the callback has access to all
+                 * needed resources, we cannot resume the execution of the
+                 * timer fop now. This will be done in the callback. */
+                timer_link = NULL;
+        } else {
+                /* The timer has been cancelled. The fop referenced by
+                 * timer_link holds the last reference. The caller is
+                 * responsible to release it when not needed anymore. */
+                ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
+        }
+
+        /* We have two options here:
+         *
+         * 1. The timer has been successfully cancelled.
+         *
+         *    This is the easiest case and we can continue with the currently
+         *    acquired lock.
+         *
+         * 2. The timer callback has already been fired.
+         *
+         *    In this case we have not been able to cancel the timer before
+         *    the timer callback has been fired, but we also know that
+         *    lock->timer != NULL. This means that the timer callback is still
+         *    trying to acquire the inode mutex that we currently own. We are
+         *    safe until we release it. In this case we can safely clear
+         *    lock->timer. This will cause that the timer callback does nothing
+         *    once it acquires the mutex.
+         */
+        lock->timer = NULL;
+
+        return timer_link;
+}
+
 static gf_boolean_t
 ec_lock_assign_owner(ec_lock_link_t *link)
 {
@@ -1891,61 +1952,7 @@ ec_lock_assign_owner(ec_lock_link_t *link)
      * empty. */
     GF_ASSERT(list_empty(&lock->frozen));
 
-    if (lock->timer != NULL) {
-        /* We are trying to acquire a lock that has an unlock timer active.
-         * This means that the lock must be idle, i.e. no fop can be in the
-         * owner, waiting or frozen lists. It also means that the lock cannot
-         * have been marked as being released (this is done without timers).
-         * There should only be one owner reference, but it's possible that
-         * some fops are being prepared to use this lock.
-         */
-        GF_ASSERT ((lock->refs_owners == 1) &&
-                   list_empty(&lock->owners) && list_empty(&lock->waiting));
-
-        /* We take the timer_link before cancelling the timer, since a
-         * successful cancellation will destroy it. It must not be NULL
-         * because it references the fop responsible for the delayed unlock
-         * that we are currently trying to cancel. */
-        timer_link = lock->timer->data;
-        GF_ASSERT(timer_link != NULL);
-
-        if (gf_timer_call_cancel(fop->xl->ctx, lock->timer) < 0) {
-            /* It's too late to avoid the execution of the timer callback.
-             * Since we need to be sure that the callback has access to all
-             * needed resources, we cannot resume the execution of the timer
-             * fop now. This will be done in the callback.
-             */
-            timer_link = NULL;
-        } else {
-            /* The timer has been cancelled, so we need to release the owner
-             * reference that was held by the fop waiting for the timer. This
-             * can be the last reference, but we'll immediately increment it
-             * for the current fop, so no need to check it.
-             */
-            lock->refs_owners--;
-
-            ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
-        }
-
-        /* We have two options here:
-         *
-         * 1. The timer has been successfully cancelled.
-         *
-         *    This is the easiest case and we can continue with the currently
-         *    acquired lock.
-         *
-         * 2. The timer callback has already been fired.
-         *
-         *    In this case we have not been able to cancel the timer before
-         *    the timer callback has been fired, but we also know that
-         *    lock->timer != NULL. This means that the timer callback is still
-         *    trying to acquire the inode mutex that we currently own. We are
-         *    safe until we release it. In this case we can safely clear
-         *    lock->timer. This will cause that the timer callback does nothing
-         *    once it acquires the mutex.
-         */
-        lock->timer = NULL;
-    }
+    timer_link = ec_lock_timer_cancel(fop->xl, lock);
 
     if (!list_empty(&lock->owners)) {
         /* There are other owners of this lock. We can only take ownership if
@@ -1965,7 +1972,13 @@ ec_lock_assign_owner(ec_lock_link_t *link)
     }
 
     list_add_tail(&link->owner_list, &lock->owners);
-    lock->refs_owners++;
+
+    /* If timer_link is not NULL, it means that we have inherited the owner
+     * reference assigned to the timer fop. In this case we simply reuse it.
+     * Otherwise we need to increase the number of owners. */
+    if (timer_link == NULL) {
+            lock->refs_owners++;
+    }
 
     assigned = _gf_true;
 
@@ -2383,6 +2396,48 @@ ec_unlock_now(ec_lock_link_t *link)
     ec_resume(link->fop, 0);
 }
 
+void
+ec_lock_release(ec_t *ec, inode_t *inode)
+{
+        ec_lock_t *lock;
+        ec_inode_t *ctx;
+        ec_lock_link_t *timer_link = NULL;
+
+        LOCK(&inode->lock);
+
+        ctx = __ec_inode_get(inode, ec->xl);
+        if (ctx == NULL) {
+                goto done;
+        }
+        lock = ctx->inode_lock;
+        if ((lock == NULL) || !lock->acquired || lock->release) {
+                goto done;
+        }
+
+        gf_msg_debug(ec->xl->name, 0,
+                     "Releasing inode %p due to lock contention", inode);
+
+        /* The lock is not marked to be released, so the frozen list should be
+         * empty. */
+        GF_ASSERT(list_empty(&lock->frozen));
+
+        timer_link = ec_lock_timer_cancel(ec->xl, lock);
+
+        /* We mark the lock to be released as soon as possible. */
+        lock->release = _gf_true;
+
+done:
+        UNLOCK(&inode->lock);
+
+        /* If we have cancelled the timer, we need to start the unlock of the
+         * inode. If there was a timer but we have been unable to cancel it
+         * because it was just triggered, the timer callback will take care
+         * of releasing the inode. */
+        if (timer_link != NULL) {
+                ec_unlock_now(timer_link);
+        }
+}
+
 void ec_unlock_timer_add(ec_lock_link_t *link);
 
 void
@@ -2470,9 +2525,60 @@ void ec_unlock_timer_cbk(void *data)
         ec_unlock_timer_del(data);
 }
 
+static gf_boolean_t
+ec_eager_lock_used(ec_t *ec, ec_fop_data_t *fop)
+{
+        /* Fops with no locks at this point mean that they are sent as sub-fops
+         * of other higher level fops. In this case we simply assume that the
+         * parent fop will take correct care of the eager lock. */
+        if (fop->lock_count == 0) {
+                return _gf_true;
+        }
+
+        /* We may have more than one lock, but this only happens in the rename
+         * fop, and both locks will reference an inode of the same type (a
+         * directory in this case), so we only need to check the first lock. */
+        if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) {
+                return ec->eager_lock;
+        }
+
+        return ec->other_eager_lock;
+}
+
+static uint32_t
+ec_eager_lock_timeout(ec_t *ec, ec_lock_t *lock)
+{
+        if (lock->loc.inode->ia_type == IA_IFREG) {
+                return ec->eager_lock_timeout;
+        }
+
+        return ec->other_eager_lock_timeout;
+}
+
+static gf_boolean_t
+ec_lock_delay_create(ec_lock_link_t *link)
+{
+        struct timespec delay;
+        ec_fop_data_t *fop = link->fop;
+        ec_lock_t *lock = link->lock;
+
+        delay.tv_sec = ec_eager_lock_timeout(fop->xl->private, lock);
+        delay.tv_nsec = 0;
+        lock->timer = gf_timer_call_after(fop->xl->ctx, delay,
+                                          ec_unlock_timer_cbk, link);
+        if (lock->timer == NULL) {
+                gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
+                       EC_MSG_UNLOCK_DELAY_FAILED,
+                       "Unable to delay an unlock");
+
+                return _gf_false;
+        }
+
+        return _gf_true;
+}
+
 void ec_unlock_timer_add(ec_lock_link_t *link)
 {
-    struct timespec delay;
     ec_fop_data_t *fop = link->fop;
     ec_lock_t *lock = link->lock;
     gf_boolean_t now = _gf_false;
@@ -2526,19 +2632,12 @@ void ec_unlock_timer_add(ec_lock_link_t *link)
             ec_trace("UNLOCK_DELAY", fop, "lock=%p, release=%d", lock,
                      lock->release);
 
-            delay.tv_sec = 1;
-            delay.tv_nsec = 0;
-            lock->timer = gf_timer_call_after(fop->xl->ctx, delay,
-                                              ec_unlock_timer_cbk, link);
-            if (lock->timer == NULL) {
-                gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
-                       EC_MSG_UNLOCK_DELAY_FAILED,
-                       "Unable to delay an unlock");
-
+            if (!ec_lock_delay_create(link)) {
                 /* We are unable to create a new timer. We immediately release
                  * the lock. */
                 lock->release = now = _gf_true;
             }
+
         } else {
             ec_trace("UNLOCK_FORCE", fop, "lock=%p, release=%d", lock,
                      lock->release);
@@ -2583,26 +2682,6 @@ void ec_flush_size_version(ec_fop_data_t * fop)
     ec_update_info(&fop->locks[0]);
 }
 
-static gf_boolean_t
-ec_use_eager_lock(ec_t *ec, ec_fop_data_t *fop)
-{
-        /* Fops with no locks at this point mean that they are sent as sub-fops
-         * of other higher level fops. In this case we simply assume that the
-         * parent fop will take correct care of the eager lock. */
-        if (fop->lock_count == 0) {
-                return _gf_true;
-        }
-
-        /* We may have more than one lock, but this only happens in the rename
-         * fop, and both locks will reference an inode of the same type (a
-         * directory in this case), so we only need to check the first lock. */
-        if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) {
-                return ec->eager_lock;
-        }
-
-        return ec->other_eager_lock;
-}
-
 static void
 ec_update_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache, ec_stripe_t *stripe,
                  ec_fop_data_t *fop)
@@ -2708,7 +2787,7 @@ void ec_lock_reuse(ec_fop_data_t *fop)
     ec = fop->xl->private;
     cbk = fop->answer;
 
-    if (ec_use_eager_lock(ec, fop) && cbk != NULL) {
+    if (ec_eager_lock_used(ec, fop) && cbk != NULL) {
         if (cbk->xdata != NULL) {
             if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT,
                                 &count) == 0) && (count > 1)) {
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index c3e291585ef..99e2f0653be 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -102,6 +102,7 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags,
 void ec_lock(ec_fop_data_t * fop);
 void ec_lock_reuse(ec_fop_data_t *fop);
 void ec_unlock(ec_fop_data_t * fop);
+void ec_lock_release(ec_t *ec, inode_t *inode);
 
 gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
                                uint64_t *size);
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 23dc434bc42..15b4c77abfe 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -669,6 +669,8 @@ struct _ec {
     uint32_t           background_heals;
     uint32_t           heal_wait_qlen;
     uint32_t           self_heal_window_size; /* max size of read/writes */
+    uint32_t           eager_lock_timeout;
+    uint32_t           other_eager_lock_timeout;
     struct list_head   pending_fops;
     struct list_head   heal_waiting;
     struct list_head   healing;
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 4c80f1283f1..30b0bdcb29c 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -271,6 +271,11 @@ reconfigure (xlator_t *this, dict_t *options)
                           bool, failed);
         GF_OPTION_RECONF ("other-eager-lock", ec->other_eager_lock, options,
                           bool, failed);
+        GF_OPTION_RECONF ("eager-lock-timeout", ec->eager_lock_timeout,
+                          options, uint32, failed);
+        GF_OPTION_RECONF ("other-eager-lock-timeout",
+                          ec->other_eager_lock_timeout, options, uint32,
+                          failed);
         GF_OPTION_RECONF ("background-heals", background_heals, options,
                           uint32, failed);
         GF_OPTION_RECONF ("heal-wait-qlength", heal_wait_qlen, options,
@@ -453,6 +458,43 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
         }
 }
 
+static gf_boolean_t
+ec_upcall(ec_t *ec, struct gf_upcall *upcall)
+{
+        struct gf_upcall_cache_invalidation *ci = NULL;
+        struct gf_upcall_inodelk_contention *lc = NULL;
+        inode_t *inode;
+
+        switch (upcall->event_type) {
+        case GF_UPCALL_CACHE_INVALIDATION:
+                ci = upcall->data;
+                ci->flags |= UP_INVAL_ATTR;
+                return _gf_true;
+
+        case GF_UPCALL_INODELK_CONTENTION:
+                lc = upcall->data;
+                if (strcmp(lc->domain, ec->xl->name) != 0) {
+                        /* The lock is not owned by EC, ignore it. */
+                        return _gf_true;
+                }
+                inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable,
+                                   upcall->gfid);
+                /* If inode is not found, it means that it's already released,
+                 * so we can ignore it. Probably it has been released and
+                 * destroyed while the contention notification was being sent.
+                 */
+                if (inode != NULL) {
+                        ec_lock_release(ec, inode);
+                        inode_unref(inode);
+                }
+
+                return _gf_false;
+
+        default:
+                return _gf_true;
+        }
+}
+
 int32_t
 ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
 {
@@ -464,19 +506,13 @@ ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
         dict_t            *output   = NULL;
         gf_boolean_t      propagate = _gf_true;
         int32_t           orig_event = event;
-        struct gf_upcall *up_data   = NULL;
-        struct gf_upcall_cache_invalidation *up_ci = NULL;
         uintptr_t mask = 0;
 
         gf_msg_trace (this->name, 0, "NOTIFY(%d): %p, %p",
                 event, data, data2);
 
         if (event == GF_EVENT_UPCALL) {
-                up_data = (struct gf_upcall *)data;
-                if (up_data->event_type == GF_UPCALL_CACHE_INVALIDATION) {
-                        up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
-                        up_ci->flags |= UP_INVAL_ATTR;
-                }
+                propagate = ec_upcall(ec, data);
                 goto done;
         }
 
@@ -664,6 +700,10 @@ init (xlator_t *this)
     GF_OPTION_INIT ("iam-self-heal-daemon", ec->shd.iamshd, bool, failed);
     GF_OPTION_INIT ("eager-lock", ec->eager_lock, bool, failed);
     GF_OPTION_INIT ("other-eager-lock", ec->other_eager_lock, bool, failed);
+    GF_OPTION_INIT ("eager-lock-timeout", ec->eager_lock_timeout, uint32,
+                    failed);
+    GF_OPTION_INIT ("other-eager-lock-timeout", ec->other_eager_lock_timeout,
+                    uint32, failed);
     GF_OPTION_INIT ("background-heals", ec->background_heals, uint32, failed);
     GF_OPTION_INIT ("heal-wait-qlength", ec->heal_wait_qlen, uint32, failed);
     GF_OPTION_INIT ("self-heal-window-size", ec->self_heal_window_size, uint32,
@@ -1456,6 +1496,29 @@ struct volume_options options[] =
       .description = "It's equivalent to the eager-lock option but for non "
                      "regular files."
     },
+    { .key = {"eager-lock-timeout"},
+      .type = GF_OPTION_TYPE_INT,
+      .min = 1,
+      .max = 60,
+      .default_value = "1",
+      .op_version = { GD_OP_VERSION_4_0_0 },
+      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+      .tags = { "disperse", "locks", "timeout" },
+      .description = "Maximum time (in seconds) that a lock on an inode is "
+                     "kept held if no new operations on the inode are "
+                     "received."
+    },
+    { .key = {"other-eager-lock-timeout"},
+      .type = GF_OPTION_TYPE_INT,
+      .min = 1,
+      .max = 60,
+      .default_value = "1",
+      .op_version = { GD_OP_VERSION_4_0_0 },
+      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+      .tags = { "disperse", "locks", "timeout" },
+      .description = "It's equivalent ot eager-lock-timeout option but for "
+                     "non regular files."
+    },
     { .key = {"background-heals"},
       .type = GF_OPTION_TYPE_INT,
       .min = 0,/*Disabling background heals*/
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index a76d6beacb1..1609fc416d2 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -200,6 +200,7 @@ int
 clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
                      clrlk_args *args, int *blkd, int *granted, int *op_errno)
 {
+        posix_locks_private_t   *priv;
         pl_inode_lock_t         *ilock          = NULL;
         pl_inode_lock_t         *tmp            = NULL;
         struct gf_flock         ulock           = {0, };
@@ -207,9 +208,20 @@ clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
         int                     bcount          = 0;
         int                     gcount          = 0;
         gf_boolean_t            chk_range       = _gf_false;
+        struct list_head        *pcontend       = NULL;
         struct list_head        released;
+        struct list_head        contend;
+        struct timespec         now = { };
 
         INIT_LIST_HEAD (&released);
+
+        priv = this->private;
+        if (priv->notify_contention) {
+                pcontend = &contend;
+                INIT_LIST_HEAD (pcontend);
+                timespec_now(&now);
+        }
+
         if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
                 *op_errno = EINVAL;
                 goto out;
@@ -283,7 +295,10 @@ granted:
 
         ret = 0;
 out:
-        grant_blocked_inode_locks (this, pl_inode, dom);
+        grant_blocked_inode_locks (this, pl_inode, dom, &now, pcontend);
+        if (pcontend != NULL) {
+                inodelk_contention_notify(this, pcontend);
+        }
         *blkd    = bcount;
         *granted = gcount;
         return ret;
@@ -294,15 +309,27 @@ int
 clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
                      clrlk_args *args, int *blkd, int *granted, int *op_errno)
 {
+        posix_locks_private_t   *priv;
         pl_entry_lock_t         *elock          = NULL;
         pl_entry_lock_t         *tmp            = NULL;
         int                     bcount          = 0;
         int                     gcount          = 0;
         int                     ret             = -1;
+        struct list_head        *pcontend       = NULL;
         struct list_head        removed;
         struct list_head        released;
+        struct list_head        contend;
+        struct timespec         now;
 
         INIT_LIST_HEAD (&released);
+
+        priv = this->private;
+        if (priv->notify_contention) {
+                pcontend = &contend;
+                INIT_LIST_HEAD (pcontend);
+                timespec_now(&now);
+        }
+
         if (args->kind & CLRLK_BLOCKED)
                 goto blkd;
 
@@ -361,12 +388,15 @@ granted:
                         list_del_init (&elock->domain_list);
                         list_add_tail (&elock->domain_list, &removed);
 
-			__pl_entrylk_unref (elock);
+                        __pl_entrylk_unref (elock);
                 }
         }
         pthread_mutex_unlock (&pl_inode->mutex);
 
-	grant_blocked_entry_locks (this, pl_inode, dom);
+        grant_blocked_entry_locks (this, pl_inode, dom, &now, pcontend);
+        if (pcontend != NULL) {
+                entrylk_contention_notify(this, pcontend);
+        }
 
         ret = 0;
 out:
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 3729ca24bed..50c156feb38 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -69,7 +69,11 @@ get_domain (pl_inode_t *pl_inode, const char *volume);
 
 void
 grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
-                           pl_dom_list_t *dom);
+                           pl_dom_list_t *dom, struct timespec *now,
+                           struct list_head *contend);
+
+void
+inodelk_contention_notify (xlator_t *this, struct list_head *contend);
 
 void
 __delete_inode_lock (pl_inode_lock_t *lock);
@@ -79,7 +83,11 @@ __pl_inodelk_unref (pl_inode_lock_t *lock);
 
 void
 grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
-                           pl_dom_list_t *dom);
+                           pl_dom_list_t *dom, struct timespec *now,
+                           struct list_head *contend);
+
+void
+entrylk_contention_notify (xlator_t *this, struct list_head *contend);
 
 void pl_update_refkeeper (xlator_t *this, inode_t *inode);
 
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 6698516fc83..008d05a34c4 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -13,17 +13,19 @@
 #include "logging.h"
 #include "common-utils.h"
 #include "list.h"
+#include "upcall-utils.h"
 
 #include "locks.h"
 #include "clear.h"
 #include "common.h"
+#include "pl-messages.h"
 
 void
 __pl_entrylk_unref (pl_entry_lock_t *lock)
 {
         lock->ref--;
         if (!lock->ref) {
-		GF_FREE ((char *)lock->basename);
+                GF_FREE ((char *)lock->basename);
                 GF_FREE (lock->connection_id);
                 GF_FREE (lock);
         }
@@ -39,7 +41,7 @@ __pl_entrylk_ref (pl_entry_lock_t *lock)
 
 static pl_entry_lock_t *
 new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
-		  const char *domain, call_frame_t *frame, char *conn_id)
+                  const char *domain, call_frame_t *frame, char *conn_id)
 {
         pl_entry_lock_t *newlock = NULL;
 
@@ -55,7 +57,7 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
         newlock->client_pid = frame->root->pid;
         newlock->volume     = domain;
         newlock->owner      = frame->root->lk_owner;
-	newlock->frame      = frame;
+        newlock->frame      = frame;
         newlock->this       = frame->this;
 
         if (conn_id) {
@@ -64,9 +66,9 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
 
         INIT_LIST_HEAD (&newlock->domain_list);
         INIT_LIST_HEAD (&newlock->blocked_locks);
-	INIT_LIST_HEAD (&newlock->client_list);
+        INIT_LIST_HEAD (&newlock->client_list);
 
-	__pl_entrylk_ref (newlock);
+        __pl_entrylk_ref (newlock);
 out:
         return newlock;
 }
@@ -201,6 +203,113 @@ out:
         return revoke_lock;
 }
 
+static gf_boolean_t
+__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+                                  struct timespec *now)
+{
+        posix_locks_private_t *priv;
+        int64_t elapsed;
+
+        priv = this->private;
+
+        /* If this lock is in a list, it means that we are about to send a
+         * notification for it, so no need to do anything else. */
+        if (!list_empty(&lock->contend)) {
+                return _gf_false;
+        }
+
+        elapsed = now->tv_sec;
+        elapsed -= lock->contention_time.tv_sec;
+        if (now->tv_nsec < lock->contention_time.tv_nsec) {
+                elapsed--;
+        }
+        if (elapsed < priv->notify_contention_delay) {
+                return _gf_false;
+        }
+
+        /* All contention notifications will be sent outside of the locked
+         * region. This means that currently granted locks might have already
+         * been unlocked by that time. To avoid the lock or the inode to be
+         * destroyed before we process them, we take an additional reference
+         * on both. */
+        inode_ref(lock->pinode->inode);
+        __pl_entrylk_ref(lock);
+
+        lock->contention_time = *now;
+
+        return _gf_true;
+}
+
+void
+entrylk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+        struct gf_upcall up;
+        struct gf_upcall_entrylk_contention lc;
+        pl_entry_lock_t *lock;
+        pl_inode_t *pl_inode;
+        client_t *client;
+        gf_boolean_t notify;
+
+        while (!list_empty(contend)) {
+                lock = list_first_entry(contend, pl_entry_lock_t, contend);
+
+                pl_inode = lock->pinode;
+
+                pthread_mutex_lock(&pl_inode->mutex);
+
+                /* If the lock has already been released, no notification is
+                 * sent. We clear the notification time in this case. */
+                notify = !list_empty(&lock->domain_list);
+                if (!notify) {
+                        lock->contention_time.tv_sec = 0;
+                        lock->contention_time.tv_nsec = 0;
+                } else {
+                        lc.type = lock->type;
+                        lc.name = lock->basename;
+                        lc.pid = lock->client_pid;
+                        lc.domain = lock->volume;
+                        lc.xdata = NULL;
+
+                        gf_uuid_copy(up.gfid, lock->pinode->gfid);
+                        client = (client_t *)lock->client;
+                        if (client == NULL) {
+                                /* A NULL client can be found if the entrylk
+                                 * was issued by a server side xlator. */
+                                up.client_uid = NULL;
+                        } else {
+                                up.client_uid = client->client_uid;
+                        }
+                }
+
+                pthread_mutex_unlock(&pl_inode->mutex);
+
+                if (notify) {
+                        up.event_type = GF_UPCALL_ENTRYLK_CONTENTION;
+                        up.data = &lc;
+
+                        if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+                                gf_msg_debug(this->name, 0,
+                                             "Entrylk contention notification "
+                                             "failed");
+                        } else {
+                                gf_msg_debug(this->name, 0,
+                                             "Entrylk contention notification "
+                                             "sent");
+                        }
+                }
+
+                pthread_mutex_lock(&pl_inode->mutex);
+
+                list_del_init(&lock->contend);
+                __pl_entrylk_unref(lock);
+
+                pthread_mutex_unlock(&pl_inode->mutex);
+
+                inode_unref(pl_inode->inode);
+        }
+}
+
+
 /**
  * entrylk_grantable - is this lock grantable?
  * @inode: inode in which to look
@@ -208,19 +317,27 @@ out:
  * @type: type of lock
  */
 static pl_entry_lock_t *
-__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock)
+__entrylk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
+                     struct timespec *now, struct list_head *contend)
 {
         pl_entry_lock_t *tmp = NULL;
-
-        if (list_empty (&dom->entrylk_list))
-                return NULL;
+        pl_entry_lock_t *ret = NULL;
 
         list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
-                if (__conflicting_entrylks (tmp, lock))
-                        return tmp;
+                if (__conflicting_entrylks (tmp, lock)) {
+                        if (ret == NULL) {
+                                ret = tmp;
+                                if (contend == NULL) {
+                                        break;
+                                }
+                        }
+                        if (__entrylk_needs_contention_notify(this, tmp, now)) {
+                                list_add_tail(&tmp->contend, contend);
+                        }
+                }
         }
 
-        return NULL;
+        return ret;
 }
 
 static pl_entry_lock_t *
@@ -228,9 +345,6 @@ __blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock)
 {
         pl_entry_lock_t *tmp = NULL;
 
-        if (list_empty (&dom->blocked_entrylks))
-                return NULL;
-
         list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) {
                 if (names_conflict (tmp->basename, lock->basename))
                         return lock;
@@ -426,6 +540,27 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
         return NULL;
 }
 
+static int
+__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+                   pl_entry_lock_t *lock, int nonblock)
+{
+        struct timeval now;
+
+        gettimeofday(&now, NULL);
+
+        if (nonblock)
+                goto out;
+
+        lock->blkd_time = now;
+        list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+        gf_msg_trace (this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
+                      pinode, lock->basename);
+
+out:
+        return -EAGAIN;
+}
+
 /**
  * __lock_entrylk - lock a name in a directory
  * @inode: inode for the directory in which to lock
@@ -439,24 +574,15 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
 
 int
 __lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
-		int nonblock, pl_dom_list_t *dom)
+                int nonblock, pl_dom_list_t *dom, struct timespec *now,
+                struct list_head *contend)
 {
         pl_entry_lock_t *conf = NULL;
         int              ret  = -EAGAIN;
 
-        conf = __entrylk_grantable (dom, lock);
+        conf = __entrylk_grantable (this, dom, lock, now, contend);
         if (conf) {
-                ret = -EAGAIN;
-                if (nonblock)
-                        goto out;
-
-                gettimeofday (&lock->blkd_time, NULL);
-                list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
-                gf_log (this->name, GF_LOG_TRACE,
-                        "Blocking lock: {pinode=%p, basename=%s}",
-                        pinode, lock->basename);
-
+                ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
                 goto out;
         }
 
@@ -471,20 +597,15 @@ __lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
          * granted, without which self-heal can't progress.
          * TODO: Find why 'owner_has_lock' is checked even for blocked locks.
          */
-        if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
-                ret = -EAGAIN;
-                if (nonblock)
-                        goto out;
-
-                gettimeofday (&lock->blkd_time, NULL);
-                list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
-                gf_log (this->name, GF_LOG_DEBUG,
-                        "Lock is grantable, but blocking to prevent starvation");
-                gf_log (this->name, GF_LOG_TRACE,
-                        "Blocking lock: {pinode=%p, basename=%s}",
-                        pinode, lock->basename);
+        if (__blocked_entrylk_conflict (dom, lock) &&
+            !(__owner_has_lock (dom, lock))) {
+                if (nonblock == 0) {
+                        gf_log (this->name, GF_LOG_DEBUG,
+                                "Lock is grantable, but blocking to prevent "
+                                "starvation");
+                }
 
+                ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
                 goto out;
         }
 
@@ -551,7 +672,8 @@ out:
 
 void
 __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
-                             pl_dom_list_t *dom, struct list_head *granted)
+                             pl_dom_list_t *dom, struct list_head *granted,
+                             struct timespec *now, struct list_head *contend)
 {
         int              bl_ret = 0;
         pl_entry_lock_t *bl   = NULL;
@@ -566,7 +688,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
 
                 list_del_init (&bl->blocked_locks);
 
-                bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom);
+                bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom, now,
+                                         contend);
 
                 if (bl_ret == 0) {
                         list_add (&bl->blocked_locks, granted);
@@ -578,7 +701,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
 /* Grants locks if possible which are blocked on a lock */
 void
 grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
-			   pl_dom_list_t *dom)
+                           pl_dom_list_t *dom, struct timespec *now,
+                           struct list_head *contend)
 {
         struct list_head  granted_list;
         pl_entry_lock_t  *tmp = NULL;
@@ -589,7 +713,7 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
         pthread_mutex_lock (&pl_inode->mutex);
         {
                 __grant_blocked_entry_locks (this, pl_inode, dom,
-                                             &granted_list);
+                                             &granted_list, now, contend);
         }
         pthread_mutex_unlock (&pl_inode->mutex);
 
@@ -610,8 +734,6 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
 		}
 	}
         pthread_mutex_unlock (&pl_inode->mutex);
-
-        return;
 }
 
 
@@ -637,9 +759,18 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
 	int              nonblock         =  0;
         gf_boolean_t     need_inode_unref =  _gf_false;
         posix_locks_private_t  *priv = NULL;
+        struct list_head *pcontend = NULL;
+        struct list_head contend;
+        struct timespec  now = { };
 
         priv = this->private;
 
+        if (priv->notify_contention) {
+                pcontend = &contend;
+                INIT_LIST_HEAD(pcontend);
+                timespec_now(&now);
+        }
+
         if (xdata)
                 dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
 
@@ -722,7 +853,8 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
                 {
 			reqlock->pinode = pinode;
 
-                        ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom);
+                        ret = __lock_entrylk (this, pinode, reqlock, nonblock,
+                                              dom, &now, pcontend);
 			if (ret == 0) {
 				reqlock->frame = NULL;
 				op_ret = 0;
@@ -778,7 +910,7 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
 		if (ctx)
 			pthread_mutex_unlock (&ctx->lock);
 
-		grant_blocked_entry_locks (this, pinode, dom);
+		grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
 
                 break;
 
@@ -810,6 +942,10 @@ unwind:
                                      cmd, type);
         }
 
+        if (pcontend != NULL) {
+                entrylk_contention_notify(this, pcontend);
+        }
+
         return 0;
 }
 
@@ -868,27 +1004,37 @@ pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
 int
 pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
 {
+        posix_locks_private_t *priv;
         pl_entry_lock_t *tmp = NULL;
         pl_entry_lock_t *l = NULL;
-	pl_dom_list_t *dom = NULL;
+        pl_dom_list_t *dom = NULL;
         pl_inode_t *pinode = NULL;
-
+        struct list_head *pcontend = NULL;
         struct list_head released;
         struct list_head unwind;
+        struct list_head contend;
+        struct timespec now = { };
 
         INIT_LIST_HEAD (&released);
         INIT_LIST_HEAD (&unwind);
 
-	pthread_mutex_lock (&ctx->lock);
+        priv = this->private;
+        if (priv->notify_contention) {
+                pcontend = &contend;
+                INIT_LIST_HEAD (pcontend);
+                timespec_now(&now);
+        }
+
+        pthread_mutex_lock (&ctx->lock);
         {
                 list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
-					  client_list) {
-			pl_entrylk_log_cleanup (l);
+                                          client_list) {
+                        pl_entrylk_log_cleanup (l);
 
-			pinode = l->pinode;
+                        pinode = l->pinode;
 
-			pthread_mutex_lock (&pinode->mutex);
-			{
+                        pthread_mutex_lock (&pinode->mutex);
+                        {
                         /* If the entrylk object is part of granted list but not
                          * blocked list, then perform the following actions:
                          * i.   delete the object from granted list;
@@ -931,38 +1077,42 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
                                                        &unwind);
                                 }
                         }
-			pthread_mutex_unlock (&pinode->mutex);
+                        pthread_mutex_unlock (&pinode->mutex);
                 }
-	}
+        }
         pthread_mutex_unlock (&ctx->lock);
 
         list_for_each_entry_safe (l, tmp, &unwind, client_list) {
                 list_del_init (&l->client_list);
 
-		if (l->frame)
-			STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
-					     NULL);
+                if (l->frame)
+                        STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
+                                             NULL);
                 list_add_tail (&l->client_list, &released);
         }
 
         list_for_each_entry_safe (l, tmp, &released, client_list) {
                 list_del_init (&l->client_list);
 
-		pinode = l->pinode;
+                pinode = l->pinode;
 
-		dom = get_domain (pinode, l->volume);
+                dom = get_domain (pinode, l->volume);
 
-		grant_blocked_entry_locks (this, pinode, dom);
+                grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
 
-		pthread_mutex_lock (&pinode->mutex);
-		{
-			__pl_entrylk_unref (l);
-		}
-		pthread_mutex_unlock (&pinode->mutex);
+                pthread_mutex_lock (&pinode->mutex);
+                {
+                        __pl_entrylk_unref (l);
+                }
+                pthread_mutex_unlock (&pinode->mutex);
 
                 inode_unref (pinode->inode);
         }
 
+        if (pcontend != NULL) {
+                entrylk_contention_notify(this, pcontend);
+        }
+
         return 0;
 }
 
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 64ffb00c18c..890ac8b6d00 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -13,10 +13,12 @@
 #include "logging.h"
 #include "common-utils.h"
 #include "list.h"
+#include "upcall-utils.h"
 
 #include "locks.h"
 #include "clear.h"
 #include "common.h"
+#include "pl-messages.h"
 
 void
 __delete_inode_lock (pl_inode_lock_t *lock)
@@ -229,22 +231,134 @@ out:
         return revoke_lock;
 }
 
+static gf_boolean_t
+__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
+                                  struct timespec *now)
+{
+        posix_locks_private_t *priv;
+        int64_t elapsed;
+
+        priv = this->private;
+
+        /* If this lock is in a list, it means that we are about to send a
+         * notification for it, so no need to do anything else. */
+        if (!list_empty(&lock->contend)) {
+                return _gf_false;
+        }
+
+        elapsed = now->tv_sec;
+        elapsed -= lock->contention_time.tv_sec;
+        if (now->tv_nsec < lock->contention_time.tv_nsec) {
+                elapsed--;
+        }
+        if (elapsed < priv->notify_contention_delay) {
+                return _gf_false;
+        }
+
+        /* All contention notifications will be sent outside of the locked
+         * region. This means that currently granted locks might have already
+         * been unlocked by that time. To avoid the lock or the inode to be
+         * destroyed before we process them, we take an additional reference
+         * on both. */
+        inode_ref(lock->pl_inode->inode);
+        __pl_inodelk_ref(lock);
+
+        lock->contention_time = *now;
+
+        return _gf_true;
+}
+
+void
+inodelk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+        struct gf_upcall up;
+        struct gf_upcall_inodelk_contention lc;
+        pl_inode_lock_t *lock;
+        pl_inode_t *pl_inode;
+        client_t *client;
+        gf_boolean_t notify;
+
+        while (!list_empty(contend)) {
+                lock = list_first_entry(contend, pl_inode_lock_t, contend);
+
+                pl_inode = lock->pl_inode;
+
+                pthread_mutex_lock(&pl_inode->mutex);
+
+                /* If the lock has already been released, no notification is
+                 * sent. We clear the notification time in this case. */
+                notify = !list_empty(&lock->list);
+                if (!notify) {
+                        lock->contention_time.tv_sec = 0;
+                        lock->contention_time.tv_nsec = 0;
+                } else {
+                        memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock));
+                        lc.pid = lock->client_pid;
+                        lc.domain = lock->volume;
+                        lc.xdata = NULL;
+
+                        gf_uuid_copy(up.gfid, lock->pl_inode->gfid);
+                        client = (client_t *)lock->client;
+                        if (client == NULL) {
+                                /* A NULL client can be found if the inodelk
+                                 * was issued by a server side xlator. */
+                                up.client_uid = NULL;
+                        } else {
+                                up.client_uid = client->client_uid;
+                        }
+                }
+
+                pthread_mutex_unlock(&pl_inode->mutex);
+
+                if (notify) {
+                        up.event_type = GF_UPCALL_INODELK_CONTENTION;
+                        up.data = &lc;
+
+                        if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+                                gf_msg_debug(this->name, 0,
+                                             "Inodelk contention notification "
+                                             "failed");
+                        } else {
+                                gf_msg_debug(this->name, 0,
+                                             "Inodelk contention notification "
+                                             "sent");
+                        }
+                }
+
+                pthread_mutex_lock(&pl_inode->mutex);
+
+                list_del_init(&lock->contend);
+                __pl_inodelk_unref(lock);
+
+                pthread_mutex_unlock(&pl_inode->mutex);
+
+                inode_unref(pl_inode->inode);
+        }
+}
+
 /* Determine if lock is grantable or not */
 static pl_inode_lock_t *
-__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+__inodelk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+                     struct timespec *now, struct list_head *contend)
 {
         pl_inode_lock_t *l = NULL;
         pl_inode_lock_t *ret = NULL;
-        if (list_empty (&dom->inodelk_list))
-                goto out;
+
         list_for_each_entry (l, &dom->inodelk_list, list){
                 if (inodelk_conflict (lock, l) &&
                     !same_inodelk_owner (lock, l)) {
-                        ret = l;
-                        goto out;
+                        if (ret == NULL) {
+                                ret = l;
+                                if (contend == NULL) {
+                                        break;
+                                }
+                        }
+                        if (__inodelk_needs_contention_notify(this, l, now)) {
+                                list_add_tail(&l->contend, contend);
+                        }
                 }
         }
-out:
+
         return ret;
 }
 
@@ -252,20 +366,14 @@ static pl_inode_lock_t *
 __blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
 {
         pl_inode_lock_t *l   = NULL;
-        pl_inode_lock_t *ret = NULL;
-
-        if (list_empty (&dom->blocked_inodelks))
-                return NULL;
 
         list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
                 if (inodelk_conflict (lock, l)) {
-                        ret = l;
-                        goto out;
+                        return l;
                 }
         }
 
-out:
-        return ret;
+        return NULL;
 }
 
 static int
@@ -286,35 +394,45 @@ __owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
         return 0;
 }
 
+static int
+__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+                   int can_block)
+{
+        struct timeval now;
+
+        gettimeofday(&now, NULL);
+
+        if (can_block == 0) {
+                goto out;
+        }
+
+        lock->blkd_time = now;
+        list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+
+        gf_msg_trace (this->name, 0, "%s (pid=%d) (lk-owner=%s) %"PRId64" - "
+                                     "%"PRId64" => Blocked",
+                      lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+                      lock->client_pid, lkowner_utoa (&lock->owner),
+                      lock->user_flock.l_start, lock->user_flock.l_len);
+
+out:
+        return -EAGAIN;
+}
 
 /* Determines if lock can be granted and adds the lock. If the lock
  * is blocking, adds it to the blocked_inodelks list of the domain.
  */
 static int
 __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
-                int can_block,  pl_dom_list_t *dom)
+                int can_block,  pl_dom_list_t *dom, struct timespec *now,
+                struct list_head *contend)
 {
         pl_inode_lock_t *conf = NULL;
         int ret = -EINVAL;
 
-        conf = __inodelk_grantable (dom, lock);
+        conf = __inodelk_grantable (this, dom, lock, now, contend);
         if (conf) {
-                ret = -EAGAIN;
-                if (can_block == 0)
-                        goto out;
-
-                gettimeofday (&lock->blkd_time, NULL);
-                list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
-                gf_log (this->name, GF_LOG_TRACE,
-                        "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
-                        lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
-                        lock->client_pid,
-                        lkowner_utoa (&lock->owner),
-                        lock->user_flock.l_start,
-                        lock->user_flock.l_len);
-
-
+                ret = __lock_blocked_add(this, dom, lock, can_block);
                 goto out;
         }
 
@@ -330,25 +448,15 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
          * will not be unlocked by SHD from Machine1.
          * TODO: Find why 'owner_has_lock' is checked even for blocked locks.
          */
-        if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
-                ret = -EAGAIN;
-                if (can_block == 0)
-                        goto out;
-
-                gettimeofday (&lock->blkd_time, NULL);
-                list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
-                gf_log (this->name, GF_LOG_DEBUG,
-                        "Lock is grantable, but blocking to prevent starvation");
-                gf_log (this->name, GF_LOG_TRACE,
-                        "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
-                        lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
-                        lock->client_pid,
-                        lkowner_utoa (&lock->owner),
-                        lock->user_flock.l_start,
-                        lock->user_flock.l_len);
-
+        if (__blocked_lock_conflict (dom, lock) &&
+            !(__owner_has_lock (dom, lock))) {
+                if (can_block != 0) {
+                        gf_log (this->name, GF_LOG_DEBUG,
+                                "Lock is grantable, but blocking to prevent "
+                                "starvation");
+                }
 
+                ret = __lock_blocked_add(this, dom, lock, can_block);
                 goto out;
         }
         __pl_inodelk_ref (lock);
@@ -417,7 +525,8 @@ out:
 
 static void
 __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
-                             struct list_head *granted, pl_dom_list_t *dom)
+                             struct list_head *granted, pl_dom_list_t *dom,
+                             struct timespec *now, struct list_head *contend)
 {
         int              bl_ret = 0;
         pl_inode_lock_t *bl = NULL;
@@ -432,7 +541,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
 
                 list_del_init (&bl->blocked_locks);
 
-                bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+                bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom, now,
+                                         contend);
 
                 if (bl_ret == 0) {
                         list_add (&bl->blocked_locks, granted);
@@ -444,7 +554,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
 /* Grant all inodelks blocked on a lock */
 void
 grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
-                           pl_dom_list_t *dom)
+                           pl_dom_list_t *dom, struct timespec *now,
+                           struct list_head *contend)
 {
         struct list_head granted;
         pl_inode_lock_t *lock;
@@ -454,7 +565,8 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
 
         pthread_mutex_lock (&pl_inode->mutex);
         {
-                __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
+                __grant_blocked_inode_locks (this, pl_inode, &granted, dom,
+                                             now, contend);
         }
         pthread_mutex_unlock (&pl_inode->mutex);
 
@@ -471,7 +583,7 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
                               &lock->user_flock, 0, 0, lock->volume);
 
                 STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
-		lock->frame = NULL;
+                lock->frame = NULL;
         }
 
         pthread_mutex_lock (&pl_inode->mutex);
@@ -488,9 +600,9 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
 static void
 pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
 {
-	pl_inode_t *pl_inode = NULL;
+        pl_inode_t *pl_inode = NULL;
 
-	pl_inode = lock->pl_inode;
+        pl_inode = lock->pl_inode;
 
         gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by "
                 "{client=%p, pid=%"PRId64" lk-owner=%s}",
@@ -503,27 +615,38 @@ pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
 int
 pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
 {
+        posix_locks_private_t *priv;
         pl_inode_lock_t *tmp = NULL;
         pl_inode_lock_t *l = NULL;
-	pl_dom_list_t *dom = NULL;
+        pl_dom_list_t *dom = NULL;
         pl_inode_t *pl_inode = NULL;
-
+        struct list_head *pcontend = NULL;
         struct list_head released;
         struct list_head unwind;
+        struct list_head contend;
+        struct timespec now = { };
+
+        priv = this->private;
 
         INIT_LIST_HEAD (&released);
         INIT_LIST_HEAD (&unwind);
 
-	pthread_mutex_lock (&ctx->lock);
+        if (priv->notify_contention) {
+                pcontend = &contend;
+                INIT_LIST_HEAD (pcontend);
+                timespec_now(&now);
+        }
+
+        pthread_mutex_lock (&ctx->lock);
         {
                 list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
-					  client_list) {
-			pl_inodelk_log_cleanup (l);
+                                          client_list) {
+                        pl_inodelk_log_cleanup (l);
 
-			pl_inode = l->pl_inode;
+                        pl_inode = l->pl_inode;
 
-			pthread_mutex_lock (&pl_inode->mutex);
-			{
+                        pthread_mutex_lock (&pl_inode->mutex);
+                        {
                         /* If the inodelk object is part of granted list but not
                          * blocked list, then perform the following actions:
                          * i.   delete the object from granted list;
@@ -567,45 +690,49 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
                                                        &unwind);
                                 }
                         }
-			pthread_mutex_unlock (&pl_inode->mutex);
+                        pthread_mutex_unlock (&pl_inode->mutex);
                 }
-	}
+        }
         pthread_mutex_unlock (&ctx->lock);
 
         list_for_each_entry_safe (l, tmp, &unwind, client_list) {
                 list_del_init (&l->client_list);
 
                 if (l->frame)
-			STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
-					     NULL);
+                        STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
+                                             NULL);
                 list_add_tail (&l->client_list, &released);
-
         }
 
         list_for_each_entry_safe (l, tmp, &released, client_list) {
                 list_del_init (&l->client_list);
 
-		pl_inode = l->pl_inode;
+                pl_inode = l->pl_inode;
 
-		dom = get_domain (pl_inode, l->volume);
+                dom = get_domain (pl_inode, l->volume);
 
-		grant_blocked_inode_locks (this, pl_inode, dom);
+                grant_blocked_inode_locks (this, pl_inode, dom, &now,
+                                           pcontend);
 
-		pthread_mutex_lock (&pl_inode->mutex);
-		{
-			__pl_inodelk_unref (l);
-		}
-		pthread_mutex_unlock (&pl_inode->mutex);
+                pthread_mutex_lock (&pl_inode->mutex);
+                {
+                        __pl_inodelk_unref (l);
+                }
+                pthread_mutex_unlock (&pl_inode->mutex);
                 inode_unref (pl_inode->inode);
         }
 
+        if (pcontend != NULL) {
+                inodelk_contention_notify(this, pcontend);
+        }
+
         return 0;
 }
 
 
 static int
 pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
-		pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+                pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
                 inode_t *inode)
 {
         posix_locks_private_t  *priv = NULL;
@@ -613,9 +740,12 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
         pl_inode_lock_t  *retlock          =  NULL;
         gf_boolean_t      unref            =  _gf_true;
         gf_boolean_t      need_inode_unref =  _gf_false;
+        struct list_head *pcontend = NULL;
+        struct list_head  contend;
+        struct timespec   now = { };
         short             fl_type;
 
-	lock->pl_inode = pl_inode;
+        lock->pl_inode = pl_inode;
         fl_type = lock->fl_type;
 
         priv = this->private;
@@ -657,12 +787,19 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
                 }
         }
 
+        if (priv->notify_contention) {
+                pcontend = &contend;
+                INIT_LIST_HEAD(pcontend);
+                timespec_now(&now);
+        }
+
 	if (ctx)
 		pthread_mutex_lock (&ctx->lock);
         pthread_mutex_lock (&pl_inode->mutex);
         {
                 if (lock->fl_type != F_UNLCK) {
-                        ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
+                        ret = __lock_inodelk (this, pl_inode, lock, can_block,
+                                              dom, &now, pcontend);
                         if (ret == 0) {
 				lock->frame = NULL;
                                 gf_log (this->name, GF_LOG_TRACE,
@@ -725,13 +862,18 @@ out:
          */
         if ((fl_type == F_UNLCK) && (ret == 0)) {
                 inode_unref (pl_inode->inode);
-                grant_blocked_inode_locks (this, pl_inode, dom);
+                grant_blocked_inode_locks (this, pl_inode, dom, &now,
+                                           pcontend);
         }
 
         if (need_inode_unref) {
                 inode_unref (pl_inode->inode);
         }
 
+        if (pcontend != NULL) {
+                inodelk_contention_notify(this, pcontend);
+        }
+
         return ret;
 }
 
@@ -771,7 +913,8 @@ new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
 
         INIT_LIST_HEAD (&lock->list);
         INIT_LIST_HEAD (&lock->blocked_locks);
-	INIT_LIST_HEAD (&lock->client_list);
+        INIT_LIST_HEAD (&lock->client_list);
+        INIT_LIST_HEAD (&lock->contend);
         __pl_inodelk_ref (lock);
 
         return lock;
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 3d3b327f56c..c2edfff8f00 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -70,6 +70,7 @@ typedef struct __posix_lock posix_lock_t;
 struct __pl_inode_lock {
         struct list_head   list;
         struct list_head   blocked_locks; /* list_head pointing to blocked_inodelks */
+        struct list_head   contend; /* list of contending locks */
         int                ref;
 
         short              fl_type;
@@ -86,6 +87,8 @@ struct __pl_inode_lock {
 
         struct timeval     blkd_time;   /*time at which lock was queued into blkd list*/
         struct timeval     granted_time; /*time at which lock was queued into active list*/
+        /*last time at wich lock contention was detected and notified*/
+        struct timespec    contention_time;
 
         /* These two together serve to uniquely identify each process
            across nodes */
@@ -120,6 +123,7 @@ typedef struct _pl_dom_list pl_dom_list_t;
 struct __entry_lock {
         struct list_head  domain_list;    /* list_head back to pl_dom_list_t */
         struct list_head  blocked_locks; /* list_head back to blocked_entrylks */
+        struct list_head  contend;       /* list of contending locks */
 	int ref;
 
         call_frame_t     *frame;
@@ -133,6 +137,8 @@ struct __entry_lock {
 
         struct timeval     blkd_time;   /*time at which lock was queued into blkd list*/
         struct timeval     granted_time; /*time at which lock was queued into active list*/
+        /*last time at wich lock contention was detected and notified*/
+        struct timespec    contention_time;
 
         void             *client;
         gf_lkowner_t      owner;
@@ -194,6 +200,8 @@ typedef struct {
         uint32_t        revocation_secs;
         gf_boolean_t    revocation_clear_all;
         uint32_t        revocation_max_blocked;
+        gf_boolean_t    notify_contention;
+        uint32_t        notify_contention_delay;
 } posix_locks_private_t;
 
 
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
index 7a1e3f488e7..e5a276f35b5 100644
--- a/xlators/features/locks/src/pl-messages.h
+++ b/xlators/features/locks/src/pl-messages.h
@@ -24,7 +24,9 @@
  */
 
 GLFS_MSGID(PL,
-        PL_MSG_LOCK_NUMBER
+        PL_MSG_LOCK_NUMBER,
+        PL_MSG_INODELK_CONTENTION_FAILED,
+        PL_MSG_ENTRYLK_CONTENTION_FAILED
 );
 
 #endif /* !_PL_MESSAGES_H_ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 78bf160058c..82d77db1164 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -3641,6 +3641,13 @@ reconfigure (xlator_t *this, dict_t *options)
         GF_OPTION_RECONF ("revocation-max-blocked",
                           priv->revocation_max_blocked, options,
                           uint32, out);
+
+        GF_OPTION_RECONF ("notify-contention", priv->notify_contention,
+                          options, bool, out);
+
+        GF_OPTION_RECONF ("notify-contention-delay",
+                          priv->notify_contention_delay, options, uint32, out);
+
         ret = 0;
 
 out:
@@ -3705,6 +3712,12 @@ init (xlator_t *this)
         GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked,
                         uint32, out);
 
+        GF_OPTION_INIT ("notify-contention", priv->notify_contention, bool,
+                        out);
+
+        GF_OPTION_INIT ("notify-contention-delay",
+                        priv->notify_contention_delay, uint32, out);
+
         this->local_pool = mem_pool_new (pl_local_t, 32);
         if (!this->local_pool) {
                 ret = -1;
@@ -4461,5 +4474,32 @@ struct volume_options options[] = {
                          "will be revoked to allow the others to proceed.  Can "
                          "be used in conjunction w/ revocation-clear-all."
         },
+        { .key = {"notify-contention"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "no",
+          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+          .op_version = { GD_OP_VERSION_4_0_0 },
+          .tags = { "locks", "contention" },
+          .description = "When this option is enabled and a lock request "
+                         "conflicts with a currently granted lock, an upcall "
+                         "notification will be sent to the current owner of "
+                         "the lock to request it to be released as soon as "
+                         "possible."
+        },
+        { .key = {"notify-contention-delay"},
+          .type = GF_OPTION_TYPE_INT,
+          .min = 0, /* An upcall notification is sent every time a conflict is
+                     * detected. */
+          .max = 60,
+          .default_value = "5",
+          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+          .op_version = { GD_OP_VERSION_4_0_0 },
+          .tags = { "locks", "contention", "timeout" },
+          .description = "This value determines the minimum amount of time "
+                         "(in seconds) between upcall contention notifications "
+                         "on the same inode. If multiple lock requests are "
+                         "received during this period, only one upcall will "
+                         "be sent."
+        },
         { .key = {NULL} },
 };
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index da85cb5297f..e0ec3368ca7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1484,6 +1484,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version = GD_OP_VERSION_3_13_0,
           .flags      = VOLOPT_FLAG_CLIENT_OPT
         },
+        { .key        = "disperse.eager-lock-timeout",
+          .voltype    = "cluster/disperse",
+          .op_version = GD_OP_VERSION_4_0_0,
+          .flags      = VOLOPT_FLAG_CLIENT_OPT
+        },
+        { .key        = "disperse.other-eager-lock-timeout",
+          .voltype    = "cluster/disperse",
+          .op_version = GD_OP_VERSION_4_0_0,
+          .flags      = VOLOPT_FLAG_CLIENT_OPT
+        },
         { .key        = "cluster.quorum-type",
           .voltype    = "cluster/replicate",
           .option     = "quorum-type",
@@ -3489,6 +3499,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version  = GD_OP_VERSION_3_9_0,
           .type        = NO_DOC,
         },
+        { .option      = "notify-contention",
+          .key         = "features.locks-notify-contention",
+          .voltype     = "features/locks",
+          .op_version  = GD_OP_VERSION_4_0_0,
+        },
+        { .option      = "notify-contention-delay",
+          .key         = "features.locks-notify-contention-delay",
+          .voltype     = "features/locks",
+          .op_version  = GD_OP_VERSION_4_0_0,
+        },
         { .key        = "disperse.shd-max-threads",
           .voltype    = "cluster/disperse",
           .op_version = GD_OP_VERSION_3_9_0,
diff --git a/xlators/protocol/client/src/client-callback.c b/xlators/protocol/client/src/client-callback.c
index 51164e57230..b2f9a225887 100644
--- a/xlators/protocol/client/src/client-callback.c
+++ b/xlators/protocol/client/src/client-callback.c
@@ -173,6 +173,101 @@ out:
         return 0;
 }
 
+int
+client_cbk_inodelk_contention (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+        int                                  ret         = -1;
+        struct iovec                        *iov         = NULL;
+        struct gf_upcall                     upcall_data = {0,};
+        struct gf_upcall_inodelk_contention  lc          = {{0,},};
+        gfs4_inodelk_contention_req          proto_lc    = {{0,},};
+
+        GF_VALIDATE_OR_GOTO ("client-callback", rpc, out);
+        GF_VALIDATE_OR_GOTO ("client-callback", mydata, out);
+        GF_VALIDATE_OR_GOTO ("client-callback", data, out);
+
+        iov = (struct iovec *)data;
+        ret = xdr_to_generic (*iov, &proto_lc,
+                              (xdrproc_t)xdr_gfs4_inodelk_contention_req);
+
+        if (ret < 0) {
+                gf_msg (THIS->name, GF_LOG_WARNING, -ret,
+                        PC_MSG_INODELK_CONTENTION_FAIL,
+                        "XDR decode of inodelk contention failed.");
+                goto out;
+        }
+
+        upcall_data.data = &lc;
+        ret = gf_proto_inodelk_contention_to_upcall (&proto_lc, &upcall_data);
+        if (ret < 0)
+                goto out;
+
+        upcall_data.event_type = GF_UPCALL_INODELK_CONTENTION;
+
+        default_notify (THIS, GF_EVENT_UPCALL, &upcall_data);
+
+out:
+        if (proto_lc.domain)
+                free (proto_lc.domain);
+
+        if (proto_lc.xdata.xdata_val)
+                free (proto_lc.xdata.xdata_val);
+
+        if (lc.xdata)
+                dict_unref (lc.xdata);
+
+        return ret;
+}
+
+int
+client_cbk_entrylk_contention (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+        int                                  ret         = -1;
+        struct iovec                        *iov         = NULL;
+        struct gf_upcall                     upcall_data = {0,};
+        struct gf_upcall_entrylk_contention  lc          = {0,};
+        gfs4_entrylk_contention_req          proto_lc    = {{0,},};
+
+        GF_VALIDATE_OR_GOTO ("client-callback", rpc, out);
+        GF_VALIDATE_OR_GOTO ("client-callback", mydata, out);
+        GF_VALIDATE_OR_GOTO ("client-callback", data, out);
+
+        iov = (struct iovec *)data;
+        ret = xdr_to_generic (*iov, &proto_lc,
+                              (xdrproc_t)xdr_gfs4_entrylk_contention_req);
+
+        if (ret < 0) {
+                gf_msg (THIS->name, GF_LOG_WARNING, -ret,
+                        PC_MSG_ENTRYLK_CONTENTION_FAIL,
+                        "XDR decode of entrylk contention failed.");
+                goto out;
+        }
+
+        upcall_data.data = &lc;
+        ret = gf_proto_entrylk_contention_to_upcall (&proto_lc, &upcall_data);
+        if (ret < 0)
+                goto out;
+
+        upcall_data.event_type = GF_UPCALL_ENTRYLK_CONTENTION;
+
+        default_notify (THIS, GF_EVENT_UPCALL, &upcall_data);
+
+out:
+        if (proto_lc.name)
+                free (proto_lc.name);
+
+        if (proto_lc.domain)
+                free (proto_lc.domain);
+
+        if (proto_lc.xdata.xdata_val)
+                free (proto_lc.xdata.xdata_val);
+
+        if (lc.xdata)
+                dict_unref (lc.xdata);
+
+        return ret;
+}
+
 rpcclnt_cb_actor_t gluster_cbk_actors[GF_CBK_MAXVALUE] = {
         [GF_CBK_NULL]               = {"NULL",               GF_CBK_NULL,               client_cbk_null },
         [GF_CBK_FETCHSPEC]          = {"FETCHSPEC",          GF_CBK_FETCHSPEC,          client_cbk_fetchspec },
@@ -181,6 +276,8 @@ rpcclnt_cb_actor_t gluster_cbk_actors[GF_CBK_MAXVALUE] = {
         [GF_CBK_CHILD_UP]           = {"CHILD_UP",           GF_CBK_CHILD_UP,           client_cbk_child_up },
         [GF_CBK_CHILD_DOWN]         = {"CHILD_DOWN",         GF_CBK_CHILD_DOWN,         client_cbk_child_down },
         [GF_CBK_RECALL_LEASE]       = {"RECALL_LEASE",       GF_CBK_RECALL_LEASE,       client_cbk_recall_lease },
+        [GF_CBK_INODELK_CONTENTION] = {"INODELK_CONTENTION", GF_CBK_INODELK_CONTENTION, client_cbk_inodelk_contention },
+        [GF_CBK_ENTRYLK_CONTENTION] = {"ENTRYLK_CONTENTION", GF_CBK_ENTRYLK_CONTENTION, client_cbk_entrylk_contention },
 };
 
 
diff --git a/xlators/protocol/client/src/client-messages.h b/xlators/protocol/client/src/client-messages.h
index 86b721bd593..5f146c67efe 100644
--- a/xlators/protocol/client/src/client-messages.h
+++ b/xlators/protocol/client/src/client-messages.h
@@ -89,7 +89,9 @@ GLFS_MSGID(PC,
         PC_MSG_CACHE_INVALIDATION_FAIL,
         PC_MSG_CHILD_STATUS,
         PC_MSG_GFID_NULL,
-        PC_MSG_RECALL_LEASE_FAIL
+        PC_MSG_RECALL_LEASE_FAIL,
+        PC_MSG_INODELK_CONTENTION_FAIL,
+        PC_MSG_ENTRYLK_CONTENTION_FAIL
 );
 
 #endif /* !_PC_MESSAGES_H__ */
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 7154355e690..66122318c79 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -1349,6 +1349,8 @@ server_process_event_upcall (xlator_t *this, void *data)
         enum gf_cbk_procnum             cbk_procnum     = GF_CBK_NULL;
         gfs3_cbk_cache_invalidation_req gf_c_req        = {0,};
         gfs3_recall_lease_req           gf_recall_lease = {{0,},};
+        gfs4_inodelk_contention_req     gf_inodelk_contention = {{0},};
+        gfs4_entrylk_contention_req     gf_entrylk_contention = {{0},};
         xdrproc_t                       xdrproc;
 
         GF_VALIDATE_OR_GOTO(this->name, data, out);
@@ -1358,7 +1360,16 @@ server_process_event_upcall (xlator_t *this, void *data)
 
         upcall_data = (struct gf_upcall *)data;
         client_uid = upcall_data->client_uid;
-        GF_VALIDATE_OR_GOTO(this->name, client_uid, out);
+        /* client_uid could be NULL if the upcall was intended for a server's
+         * child xlator (so no client_uid available) but it hasn't handled
+         * the notification. For this reason we silently ignore any upcall
+         * request with a NULL client_uid, but -1 will be returned.
+         */
+        if (client_uid == NULL) {
+                gf_msg_debug(this->name, 0,
+                             "NULL client_uid for an upcall request");
+                goto out;
+        }
 
         switch (upcall_data->event_type) {
         case GF_UPCALL_CACHE_INVALIDATION:
@@ -1381,6 +1392,28 @@ server_process_event_upcall (xlator_t *this, void *data)
                 cbk_procnum = GF_CBK_RECALL_LEASE;
                 xdrproc = (xdrproc_t)xdr_gfs3_recall_lease_req;
                 break;
+        case GF_UPCALL_INODELK_CONTENTION:
+                ret = gf_proto_inodelk_contention_from_upcall (this,
+                                                        &gf_inodelk_contention,
+                                                         upcall_data);
+                if (ret < 0)
+                        goto out;
+
+                up_req = &gf_inodelk_contention;
+                cbk_procnum = GF_CBK_INODELK_CONTENTION;
+                xdrproc = (xdrproc_t)xdr_gfs4_inodelk_contention_req;
+                break;
+        case GF_UPCALL_ENTRYLK_CONTENTION:
+                ret = gf_proto_entrylk_contention_from_upcall (this,
+                                                        &gf_entrylk_contention,
+                                                         upcall_data);
+                if (ret < 0)
+                        goto out;
+
+                up_req = &gf_entrylk_contention;
+                cbk_procnum = GF_CBK_ENTRYLK_CONTENTION;
+                xdrproc = (xdrproc_t)xdr_gfs4_entrylk_contention_req;
+                break;
         default:
                 gf_msg (this->name, GF_LOG_WARNING, EINVAL,
                         PS_MSG_INVALID_ENTRY,
@@ -1417,11 +1450,10 @@ server_process_event_upcall (xlator_t *this, void *data)
         pthread_mutex_unlock (&conf->mutex);
         ret = 0;
 out:
-        if ((gf_c_req.xdata).xdata_val)
-                GF_FREE ((gf_c_req.xdata).xdata_val);
-
-        if ((gf_recall_lease.xdata).xdata_val)
-                GF_FREE ((gf_recall_lease.xdata).xdata_val);
+        GF_FREE ((gf_c_req.xdata).xdata_val);
+        GF_FREE ((gf_recall_lease.xdata).xdata_val);
+        GF_FREE ((gf_inodelk_contention.xdata).xdata_val);
+        GF_FREE ((gf_entrylk_contention.xdata).xdata_val);
 
         return ret;
 }