From 500a656c91558dd7913f572369f20b8550e9e98d Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Sat, 19 Apr 2014 20:03:38 +0530 Subject: features/locks: Remove stale inodelk objects from 'blocked_locks' list * In the event of a DISCONNECT from a client, as part of cleanup, inodelk objects are not removed from the blocked_locks list before being unref'd and freed, causing the brick process to crash at some point when the (now) stale object is accessed again in the list. * Also during cleanup, it is pointless to try and grant lock to a previously blocked inodelk (say L1) as part of releasing another conflicting lock (L2), (which is a side-effect of L1 not being deleted from blocked_locks list before grant_blocked_inode_locks() in cleanup) if L1 is also associated with the DISCONNECTing client. This patch fixes the problem. * Also, the codepath in cleanup of entrylks seems to be granting blocked inodelks, when it should be attempting to grant blocked entrylks, which is fixed in this patch. Change-Id: I8493365c33020333b3f61aa15f505e4e7e6a9891 BUG: 1089470 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/7512 Reviewed-by: Raghavendra G Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System Reviewed-by: Krishnan Parthasarathi Reviewed-by: Anand Avati --- xlators/features/locks/src/entrylk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xlators/features/locks/src/entrylk.c') diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index dc86512be..4f5d8aca2 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -749,7 +749,7 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) dom = get_domain (pinode, l->volume); - grant_blocked_inode_locks (this, pinode, dom); + grant_blocked_entry_locks (this, pinode, dom); pthread_mutex_lock (&pinode->mutex); { -- cgit From 6a188c6b2c95d16c1bb6391c9fcb8ef808c2141b Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Thu, 24 Apr 2014 16:37:05 +0530 Subject: features/locks: Remove stale entrylk objects from 'blocked_locks' list * In the event of a DISCONNECT from a client, as part of cleanup, entrylk objects are not removed from the blocked_locks list before being unref'd and freed, causing the brick process to crash at some point when the (now) stale object is accessed again in the list. * Also during cleanup, it is pointless to try and grant lock to a previously blocked entrylk (say L1) as part of releasing another conflicting lock (L2), (which is a side-effect of L1 not being deleted from blocked_locks list before grant_blocked_entry_locks() in cleanup) if L1 is also associated with the DISCONNECTing client. This patch fixes the problem. Change-Id: I3d684c6bafc7e6db89ba68f0a2ed1dcb333791c6 BUG: 1089470 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/7560 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/features/locks/src/entrylk.c | 50 +++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) (limited to 'xlators/features/locks/src/entrylk.c') diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 4f5d8aca2..8496d9d8d 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -715,15 +715,16 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) pl_inode_t *pinode = NULL; struct list_head released; + struct list_head unwind; INIT_LIST_HEAD (&released); + INIT_LIST_HEAD (&unwind); pthread_mutex_lock (&ctx->lock); { list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers, client_list) { list_del_init (&l->client_list); - list_add_tail (&l->client_list, &released); pl_entrylk_log_cleanup (l); @@ -731,19 +732,62 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) pthread_mutex_lock (&pinode->mutex); { - list_del_init (&l->domain_list); + /* If the entrylk object is part of granted list but not + * blocked list, then perform the following actions: + * i. delete the object from granted list; + * ii. grant other locks (from other clients) that may + * have been blocked on this entrylk; and + * iii. unref the object. + * + * If the entrylk object (L1) is part of both granted + * and blocked lists, then this means that a parallel + * unlock on another entrylk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in + * __grant_blocked_entry_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked entrylks from other clients, now that L1 is + * out of their way and then unref L1 in the end, and + * leave it to the other thread (the one executing + * unlock codepath) to unwind L1's frame, delete it from + * blocked_locks list, and perform the last unref on L1. + * + * If the entrylk object (L1) is part of blocked list + * only, the cleanup code path must: + * i. delete it from the blocked_locks list inside + * this critical section, + * ii. unwind its frame with EAGAIN, + * iii. try and grant blocked entry locks from other + * clients that were otherwise grantable, but were + * blocked to avoid leaving L1 to starve forever. + * iv. unref the object. + */ + if (!list_empty (&l->domain_list)) { + list_del_init (&l->domain_list); + list_add_tail (&l->client_list, + &released); + } else { + list_del_init (&l->blocked_locks); + list_add_tail (&l->client_list, + &unwind); + } } pthread_mutex_unlock (&pinode->mutex); } } pthread_mutex_unlock (&ctx->lock); - list_for_each_entry_safe (l, tmp, &released, client_list) { + list_for_each_entry_safe (l, tmp, &unwind, client_list) { list_del_init (&l->client_list); if (l->frame) STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN, NULL); + list_add_tail (&l->client_list, &released); + } + + list_for_each_entry_safe (l, tmp, &released, client_list) { + list_del_init (&l->client_list); pinode = l->pinode; -- cgit