diff options
author | Dan Lambright <dlambrig@redhat.com> | 2015-12-16 15:28:57 -0500 |
---|---|---|
committer | Dan Lambright <dlambrig@redhat.com> | 2015-12-21 05:23:20 -0800 |
commit | 3a094f1de03b3da8cdff650c14e46aab87e1905b (patch) | |
tree | 8786ebd5baf1cf146d6b2e6efed77c58a9d53f0c /xlators/cluster/dht | |
parent | d2f48214d436be633efb1136ee951b0736935143 (diff) |
cluster/tier: do not block in synctask created from pause tier
We had run sleep() in the pause tier callback. Blocking within
a synctask is dangerous. The sleep() call does not inform
the synctask scheduler that a thread is no longer running.
It therefore believes it is running. If a second synctask already
exists, it may not be able to run. This occurs if the thread
limit in the pool has been reached.
Note the pool size only grows when a synctask is created, not
when it is moved from wait state to run state, as is the case
when an FOP completes. When the tier is paused during migration,
synctasks already exist waiting for responses to FOPs to the
server with high probability.
The fix is to yield() in the RPC callback, which will place
the synctask into the wait queue and free up a thread for the
FOP callback. A timer wakes the callback after sufficient
time has elapsed for the pause to occur.
Change-Id: I6a947ee04c6e5649946cb6d8207ba17263a67fc6
BUG: 1267950
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: http://review.gluster.org/12987
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 7 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 83 | ||||
-rw-r--r-- | xlators/cluster/dht/src/tier.c | 54 |
3 files changed, 126 insertions, 18 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index a6e9a408a44..70e5e15c0d8 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -16,6 +16,7 @@ #include "libxlator.h" #include "syncop.h" #include "refcount.h" +#include "timer.h" #ifndef _DHT_H #define _DHT_H @@ -356,6 +357,9 @@ typedef struct gf_tier_conf { uint64_t st_last_demoted_size; int request_pause; gf_boolean_t paused; + struct synctask *pause_synctask; + gf_timer_t *pause_timer; + pthread_mutex_t pause_mutex; } gf_tier_conf_t; struct gf_defrag_info_ { @@ -987,6 +991,9 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict); int gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag); +void +gf_defrag_wake_pause_tier (gf_tier_conf_t *defrag, gf_boolean_t pause); + int gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag); diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 7cac0657a83..387c764cb17 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -3619,31 +3619,88 @@ out: return 0; } +void +gf_defrag_wake_pause_tier (gf_tier_conf_t *tier_conf, gf_boolean_t pause) +{ + int woke = 0; + + pthread_mutex_lock (&tier_conf->pause_mutex); + if (tier_conf->pause_synctask) { + tier_conf->paused = pause; + synctask_wake (tier_conf->pause_synctask); + tier_conf->pause_synctask = 0; + woke = 1; + } + pthread_mutex_unlock (&tier_conf->pause_mutex); + tier_conf->request_pause = 0; + + gf_msg ("tier", GF_LOG_DEBUG, 0, + DHT_MSG_TIER_PAUSED, + "woken %d paused %d", woke, tier_conf->paused); +} + +void +gf_defrag_pause_tier_timeout (void *data) +{ + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + + this = (xlator_t *) data; + GF_VALIDATE_OR_GOTO ("tier", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO (this->name, defrag, out); + + gf_msg (this->name, GF_LOG_DEBUG, 0, + DHT_MSG_TIER_PAUSED, + "Request pause timer timeout"); + + gf_defrag_wake_pause_tier (&defrag->tier_conf, _gf_false); + +out: + return; +} + int gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag) { - int poll = 0; - int ret = 0; - int usec_sleep = 100000; /* 1/10th of a sec */ - int poll_max = 15; /* 15 times = wait at most 3/2 sec */ + int ret = 0; + struct timespec delta = {0,}; + int delay = 2; if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) goto out; /* - * Set flag requesting to pause tiering. Wait a finite time for + * Set flag requesting to pause tiering. Wait 'delay' seconds for * tiering to actually stop as indicated by the "paused" boolean, * before returning success or failure. */ defrag->tier_conf.request_pause = 1; - for (poll = 0; poll < poll_max; poll++) { - if ((defrag->tier_conf.paused == _gf_true) || - (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)) { - goto out; - } - usleep (usec_sleep); - } + if (defrag->tier_conf.paused == _gf_true) + goto out; + + gf_msg (this->name, GF_LOG_DEBUG, 0, + DHT_MSG_TIER_PAUSED, + "Request pause tier"); + + defrag->tier_conf.pause_synctask = synctask_get (); + delta.tv_sec = delay; + delta.tv_nsec = 0; + defrag->tier_conf.pause_timer = + gf_timer_call_after (this->ctx, delta, + gf_defrag_pause_tier_timeout, + this); + + synctask_yield (defrag->tier_conf.pause_synctask); + + if (defrag->tier_conf.paused == _gf_true) + goto out; ret = -1; @@ -3661,7 +3718,7 @@ gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag) { gf_msg (this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME, - "Resume tiering"); + "Pause end. Resume tiering"); defrag->tier_conf.request_pause = 0; defrag->tier_conf.paused = _gf_false; diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 10036368720..2f415c4dbc2 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -1374,10 +1374,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) goto out; } - if (defrag->tier_conf.request_pause) - defrag->tier_conf.paused = _gf_true; - else - defrag->tier_conf.paused = _gf_false; + if (tier_conf->request_pause) + gf_defrag_wake_pause_tier (tier_conf, _gf_true); sleep(1); @@ -1808,6 +1806,8 @@ tier_init (xlator_t *this) defrag->tier_conf.request_pause = 0; + pthread_mutex_init (&defrag->tier_conf.pause_mutex, 0); + ret = dict_get_str (this->options, "tier-pause", &paused); @@ -1876,6 +1876,40 @@ out: int +tier_cli_pause_done (int op_ret, call_frame_t *sync_frame, void *data) +{ + gf_msg ("tier", GF_LOG_INFO, 0, + DHT_MSG_TIER_PAUSED, + "Migrate file paused with op_ret %d", op_ret); + + return op_ret; +} + +int +tier_cli_pause (void *data) +{ + gf_defrag_info_t *defrag = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + + this = data; + + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, exit); + + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO (this->name, defrag, exit); + + gf_defrag_pause_tier (this, defrag); + + ret = 0; +exit: + return ret; +} + + +int tier_reconfigure (xlator_t *this, dict_t *options) { dht_conf_t *conf = NULL; @@ -1884,6 +1918,7 @@ tier_reconfigure (xlator_t *this, dict_t *options) int migrate_mb = 0; gf_boolean_t req_pause = _gf_false; int ret = 0; + call_frame_t *frame = NULL; conf = this->private; @@ -1932,7 +1967,16 @@ tier_reconfigure (xlator_t *this, dict_t *options) bool, out); if (req_pause == _gf_true) { - ret = gf_defrag_pause_tier (this, defrag); + + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + + frame->root->pid = GF_CLIENT_PID_DEFRAG; + + ret = synctask_new (this->ctx->env, tier_cli_pause, + tier_cli_pause_done, frame, this); + if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |