diff options
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 11 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-messages.h | 19 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 125 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 5 | ||||
-rw-r--r-- | xlators/cluster/dht/src/tier.c | 68 |
6 files changed, 227 insertions, 7 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index e2c9f5547ac..acbe3f40eea 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -7661,10 +7661,14 @@ dht_notify (xlator_t *this, int event, void *data, ...) cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER) gf_defrag_stop (defrag, GF_DEFRAG_STATUS_STOPPED, output); + else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) + ret = gf_defrag_pause_tier (this, defrag); + else if (cmd == GF_DEFRAG_CMD_RESUME_TIER) + ret = gf_defrag_resume_tier (this, defrag); } unlock: UNLOCK (&defrag->lock); - return 0; + return ret; break; } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 95ca7067806..6483b2e86d7 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -293,7 +293,8 @@ enum gf_defrag_type { GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, - + GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, + GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, }; typedef enum gf_defrag_type gf_defrag_type; @@ -353,6 +354,8 @@ typedef struct gf_tier_conf { int tier_demote_frequency; uint64_t st_last_promoted_size; uint64_t st_last_demoted_size; + int request_pause; + gf_boolean_t paused; } gf_tier_conf_t; struct gf_defrag_info_ { @@ -982,6 +985,12 @@ int gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict); int +gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag); + +int +gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag); + +int gf_defrag_start_detach_tier (gf_defrag_info_t *defrag); int diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 61631e682f8..8960ac738ec 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES 107 +#define GLFS_DHT_NUM_MESSAGES 109 #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) /* Messages with message IDs */ @@ -1002,5 +1002,22 @@ #define DHT_MSG_LOG_IPC_TIER_ERROR (GLFS_DHT_BASE + 107) +/* + * @messageid 109108 + * @diagnosis + * @recommendedaction None + */ + +#define DHT_MSG_TIER_PAUSED (GLFS_DHT_BASE + 108) + +/* + * @messageid 109108 + * @diagnosis + * @recommendedaction None + */ + +#define DHT_MSG_TIER_RESUME (GLFS_DHT_BASE + 109) + + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* _DHT_MESSAGES_H_ */ diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index b3c25ba9ee2..fe648f07e8e 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -727,7 +727,7 @@ out: static int __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, - uint64_t ia_size, int hole_exists) + uint64_t ia_size, int hole_exists) { int ret = 0; int count = 0; @@ -779,6 +779,68 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst return ret; } +static int +__tier_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, + uint64_t ia_size, int hole_exists) +{ + int ret = 0; + int count = 0; + off_t offset = 0; + struct iovec *vector = NULL; + struct iobref *iobref = NULL; + uint64_t total = 0; + size_t read_size = 0; + + /* if file size is '0', no need to enter this loop */ + while (total < ia_size) { + + read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? + DHT_REBALANCE_BLKSIZE : (ia_size - total)); + + ret = syncop_readv (from, src, read_size, + offset, 0, &vector, &count, &iobref, NULL, + NULL); + if (!ret || (ret < 0)) { + break; + } + + if (hole_exists) + ret = dht_write_with_holes (to, dst, vector, count, + ret, offset, iobref); + else + ret = syncop_writev (to, dst, vector, count, + offset, iobref, 0, NULL, NULL); + if (defrag->tier_conf.request_pause) { + gf_msg ("tier", GF_LOG_INFO, 0, + DHT_MSG_TIER_PAUSED, + "Migrate file paused"); + ret = -1; + } + + if (ret < 0) { + break; + } + offset += ret; + total += ret; + + GF_FREE (vector); + if (iobref) + iobref_unref (iobref); + iobref = NULL; + vector = NULL; + } + if (iobref) + iobref_unref (iobref); + GF_FREE (vector); + + if (ret >= 0) + ret = 0; + else + ret = -1; + + return ret; +} + static int __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, @@ -1251,8 +1313,14 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, /* All I/O happens in this function */ - ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd, - stbuf.ia_size, file_has_holes); + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + ret = __tier_migrate_data (defrag, from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes); + } else { + ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes); + } + if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, @@ -3415,6 +3483,57 @@ out: } int +gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag) +{ + int poll = 0; + int ret = 0; + int usec_sleep = 100000; /* 1/10th of a sec */ + int poll_max = 15; /* 15 times = wait at most 3/2 sec */ + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) + goto out; + + /* + * Set flag requesting to pause tiering. Wait a finite time for + * tiering to actually stop as indicated by the "paused" boolean, + * before returning success or failure. + */ + defrag->tier_conf.request_pause = 1; + + for (poll = 0; poll < poll_max; poll++) { + if ((defrag->tier_conf.paused == _gf_true) || + (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)) { + goto out; + } + + usleep (usec_sleep); + } + + ret = -1; + +out: + + gf_msg (this->name, GF_LOG_DEBUG, 0, + DHT_MSG_TIER_PAUSED, + "Pause tiering ret=%d", ret); + + return ret; +} + +int +gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag) +{ + gf_msg (this->name, GF_LOG_DEBUG, 0, + DHT_MSG_TIER_RESUME, + "Resume tiering"); + + defrag->tier_conf.request_pause = 0; + defrag->tier_conf.paused = _gf_false; + + return 0; +} + +int gf_defrag_start_detach_tier (gf_defrag_info_t *defrag) { defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 4d700482919..718f497bb03 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -963,6 +963,11 @@ struct volume_options options[] = { }, /* tier options */ + { .key = {"tier-pause"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, + { .key = {"tier-promote-frequency"}, .type = GF_OPTION_TYPE_INT, .default_value = "120", diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index d85fe41dcb0..9dcbc760330 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -307,6 +307,13 @@ tier_migrate_using_query_file (void *_args) per_file_status = 0; per_link_status = 0; + if (defrag->tier_conf.request_pause) { + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. Exiting tier_migrate_using_query_file"); + break; + } + memset (gfid_str, 0, UUID_CANONICAL_FORM_LEN+1); memset (query_record->_link_info_str, 0, DB_QUERY_RECORD_SIZE); @@ -368,6 +375,14 @@ tier_migrate_using_query_file (void *_args) /* Per link of file */ while (token_str != NULL) { + if (defrag->tier_conf.request_pause) { + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. " + "Exiting tier_migrate_using_query_file"); + goto abort; + } + link_str = gf_strdup (token_str); if (!link_info) { @@ -485,6 +500,14 @@ tier_migrate_using_query_file (void *_args) gf_uuid_copy (loc.gfid, loc.inode->gfid); + if (defrag->tier_conf.request_pause) { + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. " + "Exiting tier_migrate_using_query_file"); + goto abort; + } + ret = syncop_setxattr (this, &loc, migrate_data, 0, NULL, NULL); if (ret) { @@ -1347,6 +1370,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) goto out; } + if (defrag->tier_conf.request_pause) + defrag->tier_conf.paused = _gf_true; + else + defrag->tier_conf.paused = _gf_false; + sleep(1); if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { @@ -1368,6 +1396,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) goto out; } + if ((defrag->tier_conf.paused) || + (defrag->tier_conf.request_pause)) + continue; + + /* To have proper synchronization amongst all * brick holding nodes, so that promotion and demotions * start atomicly w.r.t promotion/demotion frequency @@ -1658,6 +1691,7 @@ tier_init (xlator_t *this) gf_defrag_info_t *defrag = NULL; char *voldir = NULL; char *mode = NULL; + char *paused = NULL; ret = dht_init (this); if (ret) { @@ -1776,7 +1810,15 @@ tier_init (xlator_t *this) defrag->tier_conf.mode = ret; } - ret = gf_asprintf (&voldir, "%s/%s", + defrag->tier_conf.request_pause = 0; + + ret = dict_get_str (this->options, + "tier-pause", &paused); + + if (paused && strcmp (paused, "on") == 0) + defrag->tier_conf.request_pause = 1; + + ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name); if (ret < 0) @@ -1844,6 +1886,9 @@ tier_reconfigure (xlator_t *this, dict_t *options) gf_defrag_info_t *defrag = NULL; char *mode = NULL; int migrate_mb = 0; + gf_boolean_t req_pause = _gf_false; + int ret = 0; + conf = this->private; if (conf->defrag) { @@ -1885,6 +1930,27 @@ tier_reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("tier-max-files", defrag->tier_conf.max_migrate_files, options, int32, out); + + GF_OPTION_RECONF ("tier-pause", + req_pause, options, + bool, out); + + if (req_pause == _gf_true) { + ret = gf_defrag_pause_tier (this, defrag); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_LOG_TIER_ERROR, + "pause tier failed on reconfigure"); + } + } else { + ret = gf_defrag_resume_tier (this, defrag); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_LOG_TIER_ERROR, + "resume tier failed on reconfigure"); + } + } + } out: |