diff options
| author | Joseph Fernandes <josferna@redhat.com> | 2015-06-06 10:28:51 +0530 | 
|---|---|---|
| committer | Dan Lambright <dlambrig@redhat.com> | 2015-06-27 03:19:42 -0700 | 
| commit | 8456e5b8cc92b61d340aadcbbcd58af25d302cec (patch) | |
| tree | 415a869f1da591d59558c07cb6977d6d42ef25f3 | |
| parent | 0c919396be6284f90de53cba9dede98980fa1692 (diff) | |
tier/dht: Fixing non atomic promotion/demotion w.r.t to frequency period
This fixes the ping-pong issue i.e files getting demoted immediately
after promition, caused by off-sync promotion/demotion processes.
The solution is do promotion/demotion refering to the system time.
To have the fix working all the file serving nodes should have
thier system time synchronized with each other either manually or
using a NTP Server.
NOTE: The ping-pong issue can re-appear even with this fix, if the admin
have different promotion freq period and demotion freq period, but this
would be under the control of the admin.
Backport of http://review.gluster.org/#/c/11110/ to 3.7.x:
> Change-Id: I1b33a5881d0cac143662ddb48e5b7b653aeb1271
> BUG: 1218717
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
> Reviewed-on: http://review.gluster.org/11110
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
> Tested-by: Dan Lambright <dlambrig@redhat.com>
> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
Change-Id: I81bd1d677487ebc0fc46df4980500102571de68e
BUG: 1230857
Reviewed-on: http://review.gluster.org/11191
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Dan Lambright <dlambrig@redhat.com>
| -rw-r--r-- | xlators/cluster/dht/src/tier.c | 99 | 
1 files changed, 59 insertions, 40 deletions
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 3e1b20eaedc..c2a9d6b4ebd 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -734,7 +734,7 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)                          if (!db_path) {                                  gf_msg ("tier", GF_LOG_ERROR, 0,                                          DHT_MSG_LOG_TIER_STATUS, -                                        "Failed to allocate memory for bricklist"); +                                        "Failed. to allocate memory for bricklist");                                  goto out;                          } @@ -763,9 +763,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)          dict_t       *bricklist_cold = NULL;          dict_t       *bricklist_hot = NULL;          dht_conf_t   *conf     = NULL; -        int tick = 0; -        int next_demote = 0; -        int next_promote = 0; +        gfdb_time_t  current_time;          int freq_promote = 0;          int freq_demote = 0;          promotion_args_t promotion_args = { 0 }; @@ -775,6 +773,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)          int ret = 0;          pthread_t promote_thread;          pthread_t demote_thread; +        gf_boolean_t  is_promotion_triggered = _gf_false; +        gf_boolean_t  is_demotion_triggered = _gf_false;          conf   = this->private; @@ -789,16 +789,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)          tier_get_bricklist (conf->subvolumes[0], bricklist_cold);          tier_get_bricklist (conf->subvolumes[1], bricklist_hot); -        freq_promote = defrag->tier_promote_frequency; -        freq_demote  = defrag->tier_demote_frequency; - -        next_promote = defrag->tier_promote_frequency % TIMER_SECS; -        next_demote  = defrag->tier_demote_frequency % TIMER_SECS; - -          gf_msg (this->name, GF_LOG_INFO, 0, -                DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d demote %d", -                next_promote, next_demote); +                DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d" +                        " demote %d", freq_promote, freq_demote);          defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; @@ -806,9 +799,6 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)                  sleep(1); -                ret_promotion = -1; -                ret_demotion = -1; -                  if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {                          ret = 1;                          gf_msg (this->name, GF_LOG_ERROR, 0, @@ -820,7 +810,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)                  if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {                          ret = 0; -                        defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; +                        defrag->defrag_status = +                                        GF_DEFRAG_STATUS_COMPLETE;                          gf_msg (this->name, GF_LOG_DEBUG, 0,                                  DHT_MSG_LOG_TIER_ERROR,                                  "defrag->defrag_cmd == " @@ -828,49 +819,72 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)                          goto out;                  } -                tick = (tick + 1) % TIMER_SECS; +                freq_promote = defrag->tier_promote_frequency; +                freq_demote  = defrag->tier_demote_frequency; + -                if (freq_promote != defrag->tier_promote_frequency) -                        next_promote = tick; -                if (freq_demote != defrag->tier_demote_frequency) -                        next_demote = tick; +                /* To have proper synchronization amongst all +                 * brick holding nodes, so that promotion and demotions +                 * start atomicly w.r.t promotion/demotion frequency +                 * period, all nodes should have thier system time +                 * in-sync with each other either manually set or +                 * using a NTP server*/ +                ret = gettimeofday (¤t_time, NULL); +                if (ret == -1) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "Failed to get current time"); +                        goto out; +                } -                if ((next_demote != tick) && (next_promote != tick)) +                is_demotion_triggered = ((current_time.tv_sec % +                                        freq_demote) == 0) ? _gf_true : +                                        _gf_false; +                is_promotion_triggered = ((current_time.tv_sec % +                                        freq_promote) == 0) ? _gf_true : +                                        _gf_false; + +                /* If no promotion and no demotion is +                 * scheduled/triggered skip a iteration */ +                if (!is_promotion_triggered && !is_demotion_triggered)                          continue; -                if (next_demote >= tick) { + +                ret_promotion = -1; +                ret_demotion = -1; + +                if (is_demotion_triggered) {                          demotion_args.this = this;                          demotion_args.brick_list = bricklist_hot;                          demotion_args.defrag = defrag;                          demotion_args.freq_time = freq_demote; -                        ret_demotion = pthread_create (&demote_thread, NULL, -                                        &tier_demote, &demotion_args); +                        ret_demotion = pthread_create (&demote_thread, +                                                NULL, &tier_demote, +                                                &demotion_args);                          if (ret_demotion) {                                  gf_msg (this->name, GF_LOG_ERROR, 0,                                          DHT_MSG_LOG_TIER_ERROR, -                                        "Failed starting Demotion thread!"); +                                        "Failed starting Demotion " +                                        "thread!");                          } -                        freq_demote = defrag->tier_demote_frequency; -                        next_demote = (tick + freq_demote) % TIMER_SECS;                  } -                if (next_promote >= tick) { +                if (is_promotion_triggered) {                          promotion_args.this = this;                          promotion_args.brick_list = bricklist_cold;                          promotion_args.defrag = defrag;                          promotion_args.freq_time = freq_promote; -                        ret_promotion = pthread_create (&promote_thread, NULL, -                                                &tier_promote, &promotion_args); +                        ret_promotion = pthread_create (&promote_thread, +                                                NULL, &tier_promote, +                                                &promotion_args);                          if (ret_promotion) {                                  gf_msg (this->name, GF_LOG_ERROR, 0,                                          DHT_MSG_LOG_TIER_ERROR, -                                        "Failed starting Promotion thread!"); +                                        "Failed starting Promotion " +                                        "thread!");                          } -                        freq_promote = defrag->tier_promote_frequency; -                        next_promote = (tick + freq_promote) % TIMER_SECS;                  } -                if (ret_demotion == 0) { +                if (is_promotion_triggered && (ret_demotion == 0)) {                          pthread_join (demote_thread, NULL);                          if (demotion_args.return_value) {                                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -880,7 +894,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)                          ret_demotion = demotion_args.return_value;                  } -                if (ret_promotion == 0) { +                if (is_demotion_triggered && (ret_promotion == 0)) {                          pthread_join (promote_thread, NULL);                          if (promotion_args.return_value) {                                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -890,10 +904,15 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)                          ret_promotion = promotion_args.return_value;                  } -                /*Collect previous and current cummulative status */ -                ret = ret | ret_demotion | ret_promotion; +                /* Collect previous and current cummulative status */ +                /* If demotion was not triggered just pass 0 to ret */ +                ret = (is_demotion_triggered) ? ret_demotion : 0; +                /* If promotion was not triggered just pass 0 to ret */ +                ret = ret | (is_promotion_triggered) ? +                                ret_promotion : 0; -                /*reseting promotion and demotion arguments for next iteration*/ +                /* reseting promotion and demotion arguments for +                 * next iteration*/                  memset (&demotion_args, 0, sizeof(demotion_args_t));                  memset (&promotion_args, 0, sizeof(promotion_args_t));  | 
