diff options
-rw-r--r-- | tests/features/lock_revocation.t | 52 | ||||
-rw-r--r-- | xlators/features/locks/src/clear.c | 4 | ||||
-rw-r--r-- | xlators/features/locks/src/common.c | 13 | ||||
-rw-r--r-- | xlators/features/locks/src/common.h | 3 | ||||
-rw-r--r-- | xlators/features/locks/src/entrylk.c | 124 | ||||
-rw-r--r-- | xlators/features/locks/src/inodelk.c | 125 | ||||
-rw-r--r-- | xlators/features/locks/src/locks.h | 4 | ||||
-rw-r--r-- | xlators/features/locks/src/posix.c | 56 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 21 |
9 files changed, 394 insertions, 8 deletions
diff --git a/tests/features/lock_revocation.t b/tests/features/lock_revocation.t new file mode 100644 index 00000000000..cbf21b71650 --- /dev/null +++ b/tests/features/lock_revocation.t @@ -0,0 +1,52 @@ +#!/bin/bash +logdir=$(gluster --print-logdir) +BRICK_LOGFILES="$logdir/bricks/d-backends-brick?.log" +rm -f $BRICK_LOGFILES &> /dev/null + +# Test that lock revocation works + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +cleanup; + +function deadlock_fop() { + local MNT=$1 + for i in {1..1000}; do + dd if=/dev/zero of=$MNT/testfile bs=1k count=10 &> /dev/null + if grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null; then + break + fi + done +} + +function monkey_unlock() { + grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null && echo SUCCESS + return 0 +} + +function append_to_file() { + local FILE_PATH=$1 + echo "hello" >> $FILE_PATH + return 0 +} + +#Init +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 features.locks-monkey-unlocking on +TEST $CLI volume set $V0 features.locks-revocation-secs 2 +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 -s $H0 $M0; +TEST $GFS --volfile-id=$V0 -s $H0 $M1; + +# Deadlock writes to a file using monkey unlocking +deadlock_fop $M0 & +EXPECT_WITHIN 60 "SUCCESS" monkey_unlock + +# Sleep > unlock timeout and attempt to write to the file +sleep 3 +TEST append_to_file $M1/testfile + +cleanup diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 640c6bb5553..d7c210f24a5 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -234,6 +234,7 @@ blkd: continue; bcount++; + list_del_init (&ilock->client_list); list_del_init (&ilock->blocked_locks); list_add (&ilock->blocked_locks, &released); } @@ -268,6 +269,7 @@ granted: continue; gcount++; + list_del_init (&ilock->client_list); list_del_init (&ilock->list); list_add (&ilock->list, &released); } @@ -321,6 +323,7 @@ blkd: bcount++; + list_del_init (&elock->client_list); list_del_init (&elock->blocked_locks); list_add_tail (&elock->blocked_locks, &released); } @@ -355,6 +358,7 @@ granted: } gcount++; + list_del_init (&elock->client_list); list_del_init (&elock->domain_list); list_add_tail (&elock->domain_list, &removed); diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 123c1101e94..796b538f6f2 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1121,3 +1121,16 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) return conf; } + +gf_boolean_t +pl_does_monkey_want_stuck_lock() +{ + long int monkey_unlock_rand = 0; + long int monkey_unlock_rand_rem = 0; + + monkey_unlock_rand = random (); + monkey_unlock_rand_rem = monkey_unlock_rand % 100; + if (monkey_unlock_rand_rem == 0) + return _gf_true; + return _gf_false; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 5486f9b8314..3729ca24bed 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -161,4 +161,7 @@ pl_metalock_is_active (pl_inode_t *pl_inode); int __pl_queue_lock (pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block); + +gf_boolean_t +pl_does_monkey_want_stuck_lock(); #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 783c57e6381..4231d760cdc 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -16,9 +16,9 @@ #include "list.h" #include "locks.h" +#include "clear.h" #include "common.h" - void __pl_entrylk_unref (pl_entry_lock_t *lock) { @@ -111,6 +111,97 @@ __conflicting_entrylks (pl_entry_lock_t *l1, pl_entry_lock_t *l2) return 0; } +/* See comments in inodelk.c for details */ +static inline gf_boolean_t +__stale_entrylk (xlator_t *this, pl_entry_lock_t *candidate_lock, + pl_entry_lock_t *requested_lock, time_t *lock_age_sec) +{ + posix_locks_private_t *priv = NULL; + struct timeval curr; + gettimeofday (&curr, NULL); + + priv = this->private; + + /* Question: Should we just prune them all given the + * chance? Or just the locks we are attempting to acquire? + */ + if (names_conflict (candidate_lock->basename, + requested_lock->basename)) { + *lock_age_sec = curr.tv_sec - + candidate_lock->granted_time.tv_sec; + if (*lock_age_sec > priv->revocation_secs) + return _gf_true; + } + return _gf_false; +} + +/* See comments in inodelk.c for details */ +static gf_boolean_t +__entrylk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_entry_lock_t *lock) +{ + posix_locks_private_t *priv = NULL; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lk = NULL; + gf_boolean_t revoke_lock = _gf_false; + int bcount = 0; + int gcount = 0; + int op_errno = 0; + clrlk_args args; + args.opts = NULL; + time_t lk_age_sec = 0; + uint32_t max_blocked = 0; + char *reason_str = NULL; + + priv = this->private; + args.type = CLRLK_ENTRY; + if (priv->revocation_clear_all == _gf_true) + args.kind = CLRLK_ALL; + else + args.kind = CLRLK_GRANTED; + + + if (list_empty (&dom->entrylk_list)) + goto out; + + pthread_mutex_lock (&pinode->mutex); + lock->pinode = pinode; + list_for_each_entry_safe (lk, tmp, &dom->entrylk_list, domain_list) { + if (__stale_entrylk (this, lk, lock, &lk_age_sec) == _gf_true) { + revoke_lock = _gf_true; + reason_str = "age"; + break; + } + } + max_blocked = priv->revocation_max_blocked; + if (max_blocked != 0 && revoke_lock == _gf_false) { + list_for_each_entry_safe (lk, tmp, &dom->blocked_entrylks, + blocked_locks) { + max_blocked--; + if (max_blocked == 0) { + revoke_lock = _gf_true; + reason_str = "max blocked"; + break; + } + } + } + pthread_mutex_unlock (&pinode->mutex); + +out: + if (revoke_lock == _gf_true) { + clrlk_clear_entrylk (this, pinode, dom, &args, &bcount, &gcount, + &op_errno); + gf_log (this->name, GF_LOG_WARNING, + "Lock revocation [reason: %s; gfid: %s; domain: %s; " + "age: %ld sec] - Entry lock revoked: %d granted & %d " + "blocked locks cleared", reason_str, + uuid_utoa (pinode->gfid), dom->domain, lk_age_sec, + gcount, bcount); + } + + return revoke_lock; +} + /** * entrylk_grantable - is this lock grantable? * @inode: inode in which to look @@ -546,6 +637,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, pl_ctx_t *ctx = NULL; int nonblock = 0; gf_boolean_t need_inode_unref = _gf_false; + posix_locks_private_t *priv = NULL; + + priv = this->private; if (xdata) dict_ret = dict_get_str (xdata, "connection-id", &conn_id); @@ -599,6 +693,24 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, * current stack unwinds. */ pinode->inode = inode_ref (inode); + if (priv->revocation_secs != 0) { + if (cmd != ENTRYLK_UNLOCK) { + __entrylk_prune_stale (this, pinode, dom, reqlock); + } else if (priv->monkey_unlocking == _gf_true) { + if (pl_does_monkey_want_stuck_lock ()) { + gf_log (this->name, GF_LOG_WARNING, + "MONKEY LOCKING (forcing stuck lock)!"); + op_ret = 0; + need_inode_unref = _gf_true; + pthread_mutex_lock (&pinode->mutex); + { + __pl_entrylk_unref (reqlock); + } + pthread_mutex_unlock (&pinode->mutex); + goto out; + } + } + } switch (cmd) { case ENTRYLK_LOCK_NB: @@ -678,9 +790,6 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, "a bug report at http://bugs.gluster.com", cmd); goto out; } - if (need_inode_unref) - inode_unref (pinode->inode); - /* The following (extra) unref corresponds to the ref that * was done at the time the lock was granted. */ @@ -689,6 +798,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, out: + if (need_inode_unref) + inode_unref (pinode->inode); + if (unwind) { entrylk_trace_out (this, frame, volume, fd, loc, basename, cmd, type, op_ret, op_errno); @@ -772,8 +884,6 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) { list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers, client_list) { - list_del_init (&l->client_list); - pl_entrylk_log_cleanup (l); pinode = l->pinode; @@ -810,6 +920,8 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) * blocked to avoid leaving L1 to starve forever. * iv. unref the object. */ + list_del_init (&l->client_list); + if (!list_empty (&l->domain_list)) { list_del_init (&l->domain_list); list_add_tail (&l->client_list, diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 1564f26b8fb..e1702c78ba1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -16,6 +16,7 @@ #include "list.h" #include "locks.h" +#include "clear.h" #include "common.h" void @@ -130,6 +131,105 @@ inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) inodelk_type_conflict (l1, l2)); } +/* + * Check to see if the candidate lock overlaps/conflicts with the + * requested lock. If so, determine how old the lock is and return + * true if it exceeds the configured threshold, false otherwise. + */ +static inline gf_boolean_t +__stale_inodelk (xlator_t *this, pl_inode_lock_t *candidate_lock, + pl_inode_lock_t *requested_lock, time_t *lock_age_sec) +{ + posix_locks_private_t *priv = NULL; + struct timeval curr; + + priv = this->private; + gettimeofday (&curr, NULL); + /* Question: Should we just prune them all given the + * chance? Or just the locks we are attempting to acquire? + */ + if (inodelk_conflict (candidate_lock, requested_lock)) { + *lock_age_sec = curr.tv_sec - + candidate_lock->granted_time.tv_sec; + if (*lock_age_sec > priv->revocation_secs) + return _gf_true; + } + return _gf_false; +} + +/* Examine any locks held on this inode and potentially revoke the lock + * if the age exceeds revocation_secs. We will clear _only_ those locks + * which are granted, and then grant those locks which are blocked. + * + * Depending on how this patch works in the wild, we may expand this and + * introduce a heuristic which clears blocked locks as well if they + * are beyond a threshold. + */ +static gf_boolean_t +__inodelk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_inode_lock_t *lock) +{ + posix_locks_private_t *priv = NULL; + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *lk = NULL; + gf_boolean_t revoke_lock = _gf_false; + int bcount = 0; + int gcount = 0; + int op_errno = 0; + clrlk_args args; + args.opts = NULL; + time_t lk_age_sec = 0; + uint32_t max_blocked = 0; + char *reason_str = NULL; + + priv = this->private; + + args.type = CLRLK_INODE; + if (priv->revocation_clear_all == _gf_true) + args.kind = CLRLK_ALL; + else + args.kind = CLRLK_GRANTED; + + if (list_empty (&dom->inodelk_list)) + goto out; + + pthread_mutex_lock (&pinode->mutex); + list_for_each_entry_safe (lk, tmp, &dom->inodelk_list, list) { + if (__stale_inodelk (this, lk, lock, &lk_age_sec) == _gf_true) { + revoke_lock = _gf_true; + reason_str = "age"; + break; + } + } + + max_blocked = priv->revocation_max_blocked; + if (max_blocked != 0 && revoke_lock == _gf_false) { + list_for_each_entry_safe (lk, tmp, &dom->blocked_inodelks, + blocked_locks) { + max_blocked--; + if (max_blocked == 0) { + revoke_lock = _gf_true; + reason_str = "max blocked"; + break; + } + } + } + pthread_mutex_unlock (&pinode->mutex); + +out: + if (revoke_lock == _gf_true) { + clrlk_clear_inodelk (this, pinode, dom, &args, &bcount, &gcount, + &op_errno); + gf_log (this->name, GF_LOG_WARNING, + "Lock revocation [reason: %s; gfid: %s; domain: %s; " + "age: %ld sec] - Inode lock revoked: %d granted & %d " + "blocked locks cleared", + reason_str, uuid_utoa (pinode->gfid), dom->domain, + lk_age_sec, gcount, bcount); + } + return revoke_lock; +} + /* Determine if lock is grantable or not */ static pl_inode_lock_t * __inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock) @@ -419,8 +519,6 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) { list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers, client_list) { - list_del_init (&l->client_list); - pl_inodelk_log_cleanup (l); pl_inode = l->pl_inode; @@ -458,6 +556,8 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) * forever. * iv. unref the object. */ + list_del_init (&l->client_list); + if (!list_empty (&l->list)) { __delete_inode_lock (l); list_add_tail (&l->client_list, @@ -509,6 +609,7 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom, inode_t *inode) { + posix_locks_private_t *priv = NULL; int ret = -EINVAL; pl_inode_lock_t *retlock = NULL; gf_boolean_t unref = _gf_true; @@ -518,6 +619,8 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, lock->pl_inode = pl_inode; fl_type = lock->fl_type; + priv = this->private; + /* Ideally, AFTER a successful lock (both blocking and non-blocking) or * an unsuccessful blocking lock operation, the inode needs to be ref'd. * @@ -537,6 +640,24 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, */ pl_inode->inode = inode_ref (inode); + if (priv->revocation_secs != 0) { + if (lock->fl_type != F_UNLCK) { + __inodelk_prune_stale (this, pl_inode, dom, lock); + } else if (priv->monkey_unlocking == _gf_true) { + if (pl_does_monkey_want_stuck_lock ()) { + pthread_mutex_lock (&pl_inode->mutex); + { + __pl_inodelk_unref (lock); + } + pthread_mutex_unlock (&pl_inode->mutex); + inode_unref (pl_inode->inode); + gf_log (this->name, GF_LOG_WARNING, + "MONKEY LOCKING (forcing stuck lock)!"); + return 0; + } + } + } + if (ctx) pthread_mutex_lock (&ctx->lock); pthread_mutex_lock (&pl_inode->mutex); diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index e363f425b65..8eb35da44be 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -190,6 +190,10 @@ typedef struct { mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */ gf_boolean_t trace; /* trace lock requests in and out */ char *brickname; + gf_boolean_t monkey_unlocking; + uint32_t revocation_secs; + gf_boolean_t revocation_clear_all; + uint32_t revocation_max_blocked; } posix_locks_private_t; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index dff17e70aaf..8a142c9991a 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -3628,7 +3628,21 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("trace", priv->trace, options, bool, out); + GF_OPTION_RECONF ("monkey-unlocking", priv->monkey_unlocking, options, + bool, out); + + GF_OPTION_RECONF ("revocation-secs", + priv->revocation_secs, options, + uint32, out); + + GF_OPTION_RECONF ("revocation-clear-all", priv->revocation_clear_all, + options, bool, out); + + GF_OPTION_RECONF ("revocation-max-blocked", + priv->revocation_max_blocked, options, + uint32, out); ret = 0; + out: return ret; } @@ -3679,6 +3693,18 @@ init (xlator_t *this) GF_OPTION_INIT ("trace", priv->trace, bool, out); + GF_OPTION_INIT ("monkey-unlocking", priv->monkey_unlocking, + bool, out); + + GF_OPTION_INIT ("revocation-secs", priv->revocation_secs, + uint32, out); + + GF_OPTION_INIT ("revocation-clear-all", priv->revocation_clear_all, + bool, out); + + GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked, + uint32, out); + this->local_pool = mem_pool_new (pl_local_t, 32); if (!this->local_pool) { ret = -1; @@ -3935,5 +3961,35 @@ struct volume_options options[] = { .description = "Trace the different lock requests " "to logs." }, + { .key = { "monkey-unlocking" }, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Ignore a random number of unlock requests. Useful " + "for testing/creating robust lock recovery mechanisms." + }, + { .key = {"revocation-secs"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "0", + .description = "Maximum time a lock can be taken out, before" + "being revoked.", + }, + { .key = {"revocation-clear-all"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "If set to true, will revoke BOTH granted and blocked " + "(pending) lock requests if a revocation threshold is " + "hit.", + }, + { .key = {"revocation-max-blocked"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "0", + .description = "A number of blocked lock requests after which a lock " + "will be revoked to allow the others to proceed. Can " + "be used in conjunction w/ revocation-clear-all." + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 6de83a7ef4c..2e6aa378097 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2998,6 +2998,27 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_8_0, .flags = OPT_FLAG_CLIENT_OPT }, + { .option = "revocation-secs", + .key = "features.locks-revocation-secs", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + }, + { .option = "revocation-clear-all", + .key = "features.locks-revocation-clear-all", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + }, + { .option = "revocation-max-blocked", + .key = "features.locks-revocation-max-blocked", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + }, + { .option = "monkey-unlocking", + .key = "features.locks-monkey-unlocking", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + .type = NO_DOC, + }, { .key = NULL } }; |