diff options
| -rw-r--r-- | tests/features/lock_revocation.t | 52 | ||||
| -rw-r--r-- | xlators/features/locks/src/clear.c | 4 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.c | 13 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.h | 3 | ||||
| -rw-r--r-- | xlators/features/locks/src/entrylk.c | 124 | ||||
| -rw-r--r-- | xlators/features/locks/src/inodelk.c | 125 | ||||
| -rw-r--r-- | xlators/features/locks/src/locks.h | 4 | ||||
| -rw-r--r-- | xlators/features/locks/src/posix.c | 56 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 21 | 
9 files changed, 394 insertions, 8 deletions
diff --git a/tests/features/lock_revocation.t b/tests/features/lock_revocation.t new file mode 100644 index 00000000000..cbf21b71650 --- /dev/null +++ b/tests/features/lock_revocation.t @@ -0,0 +1,52 @@ +#!/bin/bash +logdir=$(gluster --print-logdir) +BRICK_LOGFILES="$logdir/bricks/d-backends-brick?.log" +rm -f $BRICK_LOGFILES &> /dev/null + +# Test that lock revocation works + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +cleanup; + +function deadlock_fop() { +  local MNT=$1 +  for i in {1..1000}; do +    dd if=/dev/zero of=$MNT/testfile bs=1k count=10 &> /dev/null +    if grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null; then +      break +    fi +  done +} + +function monkey_unlock() { +  grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null && echo SUCCESS +  return 0 +} + +function append_to_file() { +  local FILE_PATH=$1 +  echo "hello" >> $FILE_PATH +  return 0 +} + +#Init +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 features.locks-monkey-unlocking on +TEST $CLI volume set $V0 features.locks-revocation-secs 2 +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 -s $H0 $M0; +TEST $GFS --volfile-id=$V0 -s $H0 $M1; + +# Deadlock writes to a file using monkey unlocking +deadlock_fop $M0 & +EXPECT_WITHIN 60 "SUCCESS" monkey_unlock + +# Sleep > unlock timeout and attempt to write to the file +sleep 3 +TEST append_to_file $M1/testfile + +cleanup diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 640c6bb5553..d7c210f24a5 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -234,6 +234,7 @@ blkd:                                  continue;                          bcount++; +                        list_del_init (&ilock->client_list);                          list_del_init (&ilock->blocked_locks);                          list_add (&ilock->blocked_locks, &released);                  } @@ -268,6 +269,7 @@ granted:                                  continue;                          gcount++; +                        list_del_init (&ilock->client_list);                          list_del_init (&ilock->list);                          list_add (&ilock->list, &released);                  } @@ -321,6 +323,7 @@ blkd:                          bcount++; +                        list_del_init (&elock->client_list);                          list_del_init (&elock->blocked_locks);                          list_add_tail (&elock->blocked_locks, &released);                  } @@ -355,6 +358,7 @@ granted:                          }                          gcount++; +                        list_del_init (&elock->client_list);                          list_del_init (&elock->domain_list);                          list_add_tail (&elock->domain_list, &removed); diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 123c1101e94..796b538f6f2 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1121,3 +1121,16 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)          return conf;  } + +gf_boolean_t +pl_does_monkey_want_stuck_lock() +{ +        long int          monkey_unlock_rand = 0; +        long int          monkey_unlock_rand_rem = 0; + +        monkey_unlock_rand = random (); +        monkey_unlock_rand_rem = monkey_unlock_rand % 100; +        if (monkey_unlock_rand_rem == 0) +                return _gf_true; +        return _gf_false; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 5486f9b8314..3729ca24bed 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -161,4 +161,7 @@ pl_metalock_is_active (pl_inode_t *pl_inode);  int  __pl_queue_lock (pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block); + +gf_boolean_t +pl_does_monkey_want_stuck_lock();  #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 783c57e6381..4231d760cdc 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -16,9 +16,9 @@  #include "list.h"  #include "locks.h" +#include "clear.h"  #include "common.h" -  void  __pl_entrylk_unref (pl_entry_lock_t *lock)  { @@ -111,6 +111,97 @@ __conflicting_entrylks (pl_entry_lock_t *l1, pl_entry_lock_t *l2)          return 0;  } +/* See comments in inodelk.c for details */ +static inline gf_boolean_t +__stale_entrylk (xlator_t *this, pl_entry_lock_t *candidate_lock, +                pl_entry_lock_t *requested_lock, time_t *lock_age_sec) +{ +        posix_locks_private_t  *priv = NULL; +        struct timeval curr; +        gettimeofday (&curr, NULL); + +        priv = this->private; + +        /* Question: Should we just prune them all given the +         * chance?  Or just the locks we are attempting to acquire? +         */ +        if (names_conflict (candidate_lock->basename, +                        requested_lock->basename)) { +                *lock_age_sec = curr.tv_sec - +                        candidate_lock->granted_time.tv_sec; +                if (*lock_age_sec > priv->revocation_secs) +                        return _gf_true; +        } +        return _gf_false; +} + +/* See comments in inodelk.c for details */ +static gf_boolean_t +__entrylk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, +                 pl_entry_lock_t *lock) +{ +        posix_locks_private_t  *priv = NULL; +        pl_entry_lock_t *tmp = NULL; +        pl_entry_lock_t *lk = NULL; +        gf_boolean_t revoke_lock = _gf_false; +        int bcount = 0; +        int gcount = 0; +        int op_errno = 0; +        clrlk_args args; +        args.opts = NULL; +        time_t lk_age_sec = 0; +        uint32_t max_blocked = 0; +        char *reason_str = NULL; + +        priv = this->private; +        args.type = CLRLK_ENTRY; +        if (priv->revocation_clear_all == _gf_true) +                args.kind = CLRLK_ALL; +        else +                args.kind = CLRLK_GRANTED; + + +        if (list_empty (&dom->entrylk_list)) +                goto out; + +        pthread_mutex_lock (&pinode->mutex); +        lock->pinode = pinode; +        list_for_each_entry_safe (lk, tmp, &dom->entrylk_list, domain_list) { +                if (__stale_entrylk (this, lk, lock, &lk_age_sec) == _gf_true) { +                        revoke_lock = _gf_true; +                        reason_str = "age"; +                        break; +                } +        } +        max_blocked = priv->revocation_max_blocked; +        if (max_blocked != 0 && revoke_lock == _gf_false) { +                list_for_each_entry_safe (lk, tmp, &dom->blocked_entrylks, +                                blocked_locks) { +                        max_blocked--; +                        if (max_blocked == 0) { +                                revoke_lock = _gf_true; +                                reason_str = "max blocked"; +                                break; +                        } +                } +        } +        pthread_mutex_unlock (&pinode->mutex); + +out: +        if (revoke_lock == _gf_true) { +                clrlk_clear_entrylk (this, pinode, dom, &args, &bcount, &gcount, +                    &op_errno); +                gf_log (this->name, GF_LOG_WARNING, +                        "Lock revocation [reason: %s; gfid: %s; domain: %s; " +                        "age: %ld sec] - Entry lock revoked:  %d granted & %d " +                        "blocked locks cleared", reason_str, +                        uuid_utoa (pinode->gfid), dom->domain, lk_age_sec, +                        gcount, bcount); +        } + +        return revoke_lock; +} +  /**   * entrylk_grantable - is this lock grantable?   * @inode: inode in which to look @@ -546,6 +637,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,          pl_ctx_t        *ctx              =  NULL;  	int              nonblock         =  0;          gf_boolean_t     need_inode_unref =  _gf_false; +        posix_locks_private_t  *priv = NULL; + +        priv = this->private;          if (xdata)                  dict_ret = dict_get_str (xdata, "connection-id", &conn_id); @@ -599,6 +693,24 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,           * current stack unwinds.           */          pinode->inode = inode_ref (inode); +        if (priv->revocation_secs != 0) { +                if (cmd != ENTRYLK_UNLOCK) { +                        __entrylk_prune_stale (this, pinode, dom, reqlock); +                } else if (priv->monkey_unlocking == _gf_true) { +                        if (pl_does_monkey_want_stuck_lock ()) { +                                gf_log (this->name, GF_LOG_WARNING, +                                    "MONKEY LOCKING (forcing stuck lock)!"); +                                op_ret = 0; +                                need_inode_unref = _gf_true; +                                pthread_mutex_lock (&pinode->mutex); +                                { +                                        __pl_entrylk_unref (reqlock); +                                } +                                pthread_mutex_unlock (&pinode->mutex); +                                goto out; +                        } +                } +        }          switch (cmd) {          case ENTRYLK_LOCK_NB: @@ -678,9 +790,6 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,                          "a bug report at http://bugs.gluster.com", cmd);                  goto out;          } -        if (need_inode_unref) -                inode_unref (pinode->inode); -          /* The following (extra) unref corresponds to the ref that           * was done at the time the lock was granted.           */ @@ -689,6 +798,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,  out: +        if (need_inode_unref) +                inode_unref (pinode->inode); +          if (unwind) {                  entrylk_trace_out (this, frame, volume, fd, loc, basename,                                     cmd, type, op_ret, op_errno); @@ -772,8 +884,6 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)          {                  list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,  					  client_list) { -                        list_del_init (&l->client_list); -  			pl_entrylk_log_cleanup (l);  			pinode = l->pinode; @@ -810,6 +920,8 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)                           *      blocked to avoid leaving L1 to starve forever.                           * iv.  unref the object.                           */ +                                list_del_init (&l->client_list); +                                  if (!list_empty (&l->domain_list)) {                                          list_del_init (&l->domain_list);                                          list_add_tail (&l->client_list, diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 1564f26b8fb..e1702c78ba1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -16,6 +16,7 @@  #include "list.h"  #include "locks.h" +#include "clear.h"  #include "common.h"  void @@ -130,6 +131,105 @@ inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)                  inodelk_type_conflict (l1, l2));  } +/* + * Check to see if the candidate lock overlaps/conflicts with the + * requested lock.  If so, determine how old the lock is and return + * true if it exceeds the configured threshold, false otherwise. + */ +static inline gf_boolean_t +__stale_inodelk (xlator_t *this, pl_inode_lock_t *candidate_lock, +                pl_inode_lock_t *requested_lock, time_t *lock_age_sec) +{ +        posix_locks_private_t  *priv = NULL; +        struct timeval curr; + +        priv = this->private; +        gettimeofday (&curr, NULL); +        /* Question: Should we just prune them all given the +         * chance?  Or just the locks we are attempting to acquire? +         */ +        if (inodelk_conflict (candidate_lock, requested_lock)) { +                *lock_age_sec = curr.tv_sec - +                        candidate_lock->granted_time.tv_sec; +                if (*lock_age_sec > priv->revocation_secs) +                        return _gf_true; +        } +        return _gf_false; +} + +/* Examine any locks held on this inode and potentially revoke the lock + * if the age exceeds revocation_secs.  We will clear _only_ those locks + * which are granted, and then grant those locks which are blocked. + * + * Depending on how this patch works in the wild, we may expand this and + * introduce a heuristic which clears blocked locks as well if they + * are beyond a threshold. + */ +static gf_boolean_t +__inodelk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, +                       pl_inode_lock_t *lock) +{ +        posix_locks_private_t  *priv = NULL; +        pl_inode_lock_t *tmp = NULL; +        pl_inode_lock_t *lk = NULL; +        gf_boolean_t revoke_lock = _gf_false; +        int bcount = 0; +        int gcount = 0; +        int op_errno = 0; +        clrlk_args args; +        args.opts = NULL; +        time_t lk_age_sec = 0; +        uint32_t max_blocked = 0; +        char *reason_str = NULL; + +        priv = this->private; + +        args.type = CLRLK_INODE; +        if (priv->revocation_clear_all == _gf_true) +                args.kind = CLRLK_ALL; +        else +                args.kind = CLRLK_GRANTED; + +        if (list_empty (&dom->inodelk_list)) +                goto out; + +        pthread_mutex_lock (&pinode->mutex); +        list_for_each_entry_safe (lk, tmp, &dom->inodelk_list, list) { +                if (__stale_inodelk (this, lk, lock, &lk_age_sec) == _gf_true) { +                        revoke_lock = _gf_true; +                        reason_str = "age"; +                        break; +                } +        } + +        max_blocked = priv->revocation_max_blocked; +        if (max_blocked != 0 && revoke_lock == _gf_false) { +                list_for_each_entry_safe (lk, tmp, &dom->blocked_inodelks, +                                blocked_locks) { +                        max_blocked--; +                        if (max_blocked == 0) { +                                revoke_lock = _gf_true; +                                reason_str = "max blocked"; +                                break; +                        } +                } +        } +        pthread_mutex_unlock (&pinode->mutex); + +out: +        if (revoke_lock == _gf_true) { +                clrlk_clear_inodelk (this, pinode, dom, &args, &bcount, &gcount, +                        &op_errno); +                gf_log (this->name, GF_LOG_WARNING, +                        "Lock revocation [reason: %s; gfid: %s; domain: %s; " +                        "age: %ld sec] - Inode lock revoked:  %d granted & %d " +                        "blocked locks cleared", +                        reason_str, uuid_utoa (pinode->gfid), dom->domain, +                        lk_age_sec, gcount, bcount); +        } +        return revoke_lock; +} +  /* Determine if lock is grantable or not */  static pl_inode_lock_t *  __inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock) @@ -419,8 +519,6 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)          {                  list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,  					  client_list) { -                        list_del_init (&l->client_list); -  			pl_inodelk_log_cleanup (l);  			pl_inode = l->pl_inode; @@ -458,6 +556,8 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)                           *      forever.                           * iv.  unref the object.                           */ +                                list_del_init (&l->client_list); +                                  if (!list_empty (&l->list)) {                                          __delete_inode_lock (l);                                          list_add_tail (&l->client_list, @@ -509,6 +609,7 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,  		pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,                  inode_t *inode)  { +        posix_locks_private_t  *priv = NULL;          int               ret              = -EINVAL;          pl_inode_lock_t  *retlock          =  NULL;          gf_boolean_t      unref            =  _gf_true; @@ -518,6 +619,8 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,  	lock->pl_inode = pl_inode;          fl_type = lock->fl_type; +        priv = this->private; +          /* Ideally, AFTER a successful lock (both blocking and non-blocking) or           * an unsuccessful blocking lock operation, the inode needs to be ref'd.           * @@ -537,6 +640,24 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,           */          pl_inode->inode = inode_ref (inode); +        if (priv->revocation_secs != 0) { +                if (lock->fl_type != F_UNLCK) { +                        __inodelk_prune_stale (this, pl_inode, dom, lock); +                } else if (priv->monkey_unlocking == _gf_true) { +                        if (pl_does_monkey_want_stuck_lock ()) { +                                pthread_mutex_lock (&pl_inode->mutex); +                                { +                                        __pl_inodelk_unref (lock); +                                } +                                pthread_mutex_unlock (&pl_inode->mutex); +                                inode_unref (pl_inode->inode); +                                gf_log (this->name, GF_LOG_WARNING, +                                    "MONKEY LOCKING (forcing stuck lock)!"); +                                return 0; +                        } +                } +        } +  	if (ctx)  		pthread_mutex_lock (&ctx->lock);          pthread_mutex_lock (&pl_inode->mutex); diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index e363f425b65..8eb35da44be 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -190,6 +190,10 @@ typedef struct {          mlk_mode_t      mandatory_mode; /* holds current mandatory locking mode */          gf_boolean_t    trace;          /* trace lock requests in and out */          char           *brickname; +        gf_boolean_t    monkey_unlocking; +        uint32_t        revocation_secs; +        gf_boolean_t    revocation_clear_all; +        uint32_t        revocation_max_blocked;  } posix_locks_private_t; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index dff17e70aaf..8a142c9991a 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -3628,7 +3628,21 @@ reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("trace", priv->trace, options, bool, out); +        GF_OPTION_RECONF ("monkey-unlocking", priv->monkey_unlocking, options, +                          bool, out); + +        GF_OPTION_RECONF ("revocation-secs", +                          priv->revocation_secs, options, +                          uint32, out); + +        GF_OPTION_RECONF ("revocation-clear-all", priv->revocation_clear_all, +                          options, bool, out); + +        GF_OPTION_RECONF ("revocation-max-blocked", +                          priv->revocation_max_blocked, options, +                          uint32, out);          ret = 0; +  out:          return ret;  } @@ -3679,6 +3693,18 @@ init (xlator_t *this)          GF_OPTION_INIT ("trace", priv->trace, bool, out); +        GF_OPTION_INIT ("monkey-unlocking", priv->monkey_unlocking, +                        bool, out); + +        GF_OPTION_INIT ("revocation-secs", priv->revocation_secs, +                        uint32, out); + +        GF_OPTION_INIT ("revocation-clear-all", priv->revocation_clear_all, +                        bool, out); + +        GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked, +                        uint32, out); +          this->local_pool = mem_pool_new (pl_local_t, 32);          if (!this->local_pool) {                  ret = -1; @@ -3935,5 +3961,35 @@ struct volume_options options[] = {            .description = "Trace the different lock requests "                           "to logs."          }, +        { .key  = { "monkey-unlocking" }, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "false", +          .description = "Ignore a random number of unlock requests.  Useful " +                         "for testing/creating robust lock recovery mechanisms." +        }, +        { .key = {"revocation-secs"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0, +          .max = INT_MAX, +          .default_value = "0", +          .description = "Maximum time a lock can be taken out, before" +                         "being revoked.", +        }, +        { .key = {"revocation-clear-all"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "false", +          .description = "If set to true, will revoke BOTH granted and blocked " +                         "(pending) lock requests if a revocation threshold is " +                         "hit.", +        }, +        { .key = {"revocation-max-blocked"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0, +          .max = INT_MAX, +          .default_value = "0", +          .description = "A number of blocked lock requests after which a lock " +                         "will be revoked to allow the others to proceed.  Can " +                         "be used in conjunction w/ revocation-clear-all." +        },          { .key = {NULL} },  }; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 6de83a7ef4c..2e6aa378097 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2998,6 +2998,27 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .op_version = GD_OP_VERSION_3_8_0,            .flags      = OPT_FLAG_CLIENT_OPT          }, +        { .option      = "revocation-secs", +          .key         = "features.locks-revocation-secs", +          .voltype     = "features/locks", +          .op_version  = GD_OP_VERSION_3_9_0, +        }, +        { .option      = "revocation-clear-all", +          .key         = "features.locks-revocation-clear-all", +          .voltype     = "features/locks", +          .op_version  = GD_OP_VERSION_3_9_0, +        }, +        { .option      = "revocation-max-blocked", +          .key         = "features.locks-revocation-max-blocked", +          .voltype     = "features/locks", +          .op_version  = GD_OP_VERSION_3_9_0, +        }, +        { .option      = "monkey-unlocking", +          .key         = "features.locks-monkey-unlocking", +          .voltype     = "features/locks", +          .op_version  = GD_OP_VERSION_3_9_0, +          .type        = NO_DOC, +        },          { .key         = NULL          }  };  | 
