diff options
author | Venky Shankar <vshankar@redhat.com> | 2014-06-18 23:36:48 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-07-11 00:08:57 -0700 |
commit | 6532a65b56a652622612a6edcd03fff90fbeff0f (patch) | |
tree | 3b86ab6dc950160f0996ffe5d73d88f11f66c3d4 | |
parent | c7251ebfe8e14090f9420786973c5f8232555b8e (diff) |
features/changelog: prevent deadlock on thread cancellation
helper threads (fsync, rollover) wake up periodically and perform
their respective operation under a lock (crt->lock). These threads
are also subjected to cancellation under some circumstance such as
disabling changelog. This is inherently dangerous when funtions
which are cancellation points for pthread_cancel(3) are used
in the locked region.
Consider this
pthread_mutex_lock(&mutex);
{
/* ... */
ret = fsync (fd); <-- cancellation point
/* ... */
}
pthread_mutex_unlock(&mutex);
A pthread_cancel(3) by another thread just before fsync(3) but
after pthread_mutex_lock(3) would result in the thread getting
cancelled when fsync(3) is invoked, thereby never unlocking the
mutex. Moreover, in case of changelog translator, the locked
region (under crt->lock in changelog-rt.c) is also the code
path for fop changelog updation. Therefore, unlocking the
mutex in thread cleanup handler (pthread_cleanup_pop(3)) might
prematurely release the mutex during fop updation path.
This patch fixes such problems existing in fsync and rollover
threads. Fix is to enter the locked region with cancellation
disabled and enable it after mutex unlock. Also, test for a
cancellation request early on in case none of the functions
are cancellation points.
Change-Id: I1795627a12827609c1da659d07fc1457ffa033de
BUG: 1110917
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: http://review.gluster.org/8106
Reviewed-by: Kotresh HR <khiremat@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r-- | tests/bugs/bug-1110917.t | 39 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-helpers.c | 34 |
2 files changed, 73 insertions, 0 deletions
diff --git a/tests/bugs/bug-1110917.t b/tests/bugs/bug-1110917.t new file mode 100644 index 00000000000..927b2342192 --- /dev/null +++ b/tests/bugs/bug-1110917.t @@ -0,0 +1,39 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2; +TEST $CLI volume start $V0; + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +TEST $CLI volume set $V0 changelog on +TEST $CLI volume set $V0 changelog.fsync-interval 1 + +# perform I/O on the background +f=$(basename `mktemp`) +dd if=/dev/urandom of=$M0/$f count=100000 bs=4k & + +# this is the best we can do without inducing _error points_ in the code +# without the patch reconfigre() would hang... +TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1` +TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1` + +TEST $CLI volume set $V0 changelog off +TEST $CLI volume set $V0 changelog on +TEST $CLI volume set $V0 changelog off +TEST $CLI volume set $V0 changelog on + +TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1` +TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1` + +# if there's a deadlock, this would hang +wait; + +cleanup; diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index b271f3f0fc0..bb8150bd719 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -25,6 +25,28 @@ #include "changelog-encoders.h" #include <pthread.h> +inline void +__mask_cancellation (xlator_t *this) +{ + int ret = 0; + + ret = pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to disable thread cancellation"); +} + +inline void +__unmask_cancellation (xlator_t *this) +{ + int ret = 0; + + ret = pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to enable thread cancellation"); +} + static void changelog_cleanup_free_mutex (void *arg_mutex) { @@ -618,6 +640,8 @@ changelog_rollover (void *data) slice = &priv->slice; while (1) { + (void) pthread_testcancel(); + tv.tv_sec = priv->rollover_time; tv.tv_usec = 0; FD_ZERO(&rset); @@ -708,6 +732,8 @@ changelog_rollover (void *data) continue; } + __mask_cancellation (this); + LOCK (&priv->lock); { ret = changelog_inject_single_event (this, priv, &cld); @@ -715,6 +741,8 @@ changelog_rollover (void *data) SLICE_VERSION_UPDATE (slice); } UNLOCK (&priv->lock); + + __unmask_cancellation (this); } return NULL; @@ -733,6 +761,8 @@ changelog_fsync_thread (void *data) cld.cld_type = CHANGELOG_TYPE_FSYNC; while (1) { + (void) pthread_testcancel(); + tv.tv_sec = priv->fsync_interval; tv.tv_usec = 0; @@ -740,10 +770,14 @@ changelog_fsync_thread (void *data) if (ret) continue; + __mask_cancellation (this); + ret = changelog_inject_single_event (this, priv, &cld); if (ret) gf_log (this->name, GF_LOG_ERROR, "failed to inject fsync event"); + + __unmask_cancellation (this); } return NULL; |