diff options
author | Dan Lambright <dlambrig@redhat.com> | 2015-10-05 19:52:02 +0000 |
---|---|---|
committer | Dan Lambright <dlambrig@redhat.com> | 2015-10-21 15:44:35 -0700 |
commit | 36974c36fa4231df3f0e9428a9da6d1aa33348ab (patch) | |
tree | cbd0c604e4f4fdcd39dbeef2f2713b3d494957f6 /xlators/cluster/dht/src | |
parent | 802b24d4d9da09dfb1479eb54bd4a74f678bc97e (diff) |
cluster/tier: add pause tier for snapshots
Snaps of tiered volumes cannot handle files undergoing migration.
We implement a helper mechanism to "pause" migration. Any files
undergoing migration are aborted. Clean up is done to remove
sticky bits and data at the destination. Migration is restarted
after snap completes.
For testing an internal switch is added. It is not exposed externally.
gluster volume set vol1 tier-pause [true|false]
Change-Id: Ia85bbf89ac142e9b7e73fcbef98bb9da86097799
BUG: 1267950
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: http://review.gluster.org/12304
Reviewed-by: N Balachandran <nbalacha@redhat.com>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 11 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-messages.h | 19 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 125 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 5 | ||||
-rw-r--r-- | xlators/cluster/dht/src/tier.c | 68 |
6 files changed, 227 insertions, 7 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index e2c9f5547ac..acbe3f40eea 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -7661,10 +7661,14 @@ dht_notify (xlator_t *this, int event, void *data, ...) cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER) gf_defrag_stop (defrag, GF_DEFRAG_STATUS_STOPPED, output); + else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) + ret = gf_defrag_pause_tier (this, defrag); + else if (cmd == GF_DEFRAG_CMD_RESUME_TIER) + ret = gf_defrag_resume_tier (this, defrag); } unlock: UNLOCK (&defrag->lock); - return 0; + return ret; break; } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 95ca7067806..6483b2e86d7 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -293,7 +293,8 @@ enum gf_defrag_type { GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, - + GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, + GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, }; typedef enum gf_defrag_type gf_defrag_type; @@ -353,6 +354,8 @@ typedef struct gf_tier_conf { int tier_demote_frequency; uint64_t st_last_promoted_size; uint64_t st_last_demoted_size; + int request_pause; + gf_boolean_t paused; } gf_tier_conf_t; struct gf_defrag_info_ { @@ -982,6 +985,12 @@ int gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict); int +gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag); + +int +gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag); + +int gf_defrag_start_detach_tier (gf_defrag_info_t *defrag); int diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 61631e682f8..8960ac738ec 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES 107 +#define GLFS_DHT_NUM_MESSAGES 109 #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) /* Messages with message IDs */ @@ -1002,5 +1002,22 @@ #define DHT_MSG_LOG_IPC_TIER_ERROR (GLFS_DHT_BASE + 107) +/* + * @messageid 109108 + * @diagnosis + * @recommendedaction None + */ + +#define DHT_MSG_TIER_PAUSED (GLFS_DHT_BASE + 108) + +/* + * @messageid 109108 + * @diagnosis + * @recommendedaction None + */ + +#define DHT_MSG_TIER_RESUME (GLFS_DHT_BASE + 109) + + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* _DHT_MESSAGES_H_ */ diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index b3c25ba9ee2..fe648f07e8e 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -727,7 +727,7 @@ out: static int __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, - uint64_t ia_size, int hole_exists) + uint64_t ia_size, int hole_exists) { int ret = 0; int count = 0; @@ -779,6 +779,68 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst return ret; } +static int +__tier_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, + uint64_t ia_size, int hole_exists) +{ + int ret = 0; + int count = 0; + off_t offset = 0; + struct iovec *vector = NULL; + struct iobref *iobref = NULL; + uint64_t total = 0; + size_t read_size = 0; + + /* if file size is '0', no need to enter this loop */ + while (total < ia_size) { + + read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? + DHT_REBALANCE_BLKSIZE : (ia_size - total)); + + ret = syncop_readv (from, src, read_size, + offset, 0, &vector, &count, &iobref, NULL, + NULL); + if (!ret || (ret < 0)) { + break; + } + + if (hole_exists) + ret = dht_write_with_holes (to, dst, vector, count, + ret, offset, iobref); + else + ret = syncop_writev (to, dst, vector, count, + offset, iobref, 0, NULL, NULL); + if (defrag->tier_conf.request_pause) { + gf_msg ("tier", GF_LOG_INFO, 0, + DHT_MSG_TIER_PAUSED, + "Migrate file paused"); + ret = -1; + } + + if (ret < 0) { + break; + } + offset += ret; + total += ret; + + GF_FREE (vector); + if (iobref) + iobref_unref (iobref); + iobref = NULL; + vector = NULL; + } + if (iobref) + iobref_unref (iobref); + GF_FREE (vector); + + if (ret >= 0) + ret = 0; + else + ret = -1; + + return ret; +} + static int __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, @@ -1251,8 +1313,14 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, /* All I/O happens in this function */ - ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd, - stbuf.ia_size, file_has_holes); + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + ret = __tier_migrate_data (defrag, from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes); + } else { + ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes); + } + if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, @@ -3415,6 +3483,57 @@ out: } int +gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag) +{ + int poll = 0; + int ret = 0; + int usec_sleep = 100000; /* 1/10th of a sec */ + int poll_max = 15; /* 15 times = wait at most 3/2 sec */ + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) + goto out; + + /* + * Set flag requesting to pause tiering. Wait a finite time for + * tiering to actually stop as indicated by the "paused" boolean, + * before returning success or failure. + */ + defrag->tier_conf.request_pause = 1; + + for (poll = 0; poll < poll_max; poll++) { + if ((defrag->tier_conf.paused == _gf_true) || + (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)) { + goto out; + } + + usleep (usec_sleep); + } + + ret = -1; + +out: + + gf_msg (this->name, GF_LOG_DEBUG, 0, + DHT_MSG_TIER_PAUSED, + "Pause tiering ret=%d", ret); + + return ret; +} + +int +gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag) +{ + gf_msg (this->name, GF_LOG_DEBUG, 0, + DHT_MSG_TIER_RESUME, + "Resume tiering"); + + defrag->tier_conf.request_pause = 0; + defrag->tier_conf.paused = _gf_false; + + return 0; +} + +int gf_defrag_start_detach_tier (gf_defrag_info_t *defrag) { defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 4d700482919..718f497bb03 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -963,6 +963,11 @@ struct volume_options options[] = { }, /* tier options */ + { .key = {"tier-pause"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, + { .key = {"tier-promote-frequency"}, .type = GF_OPTION_TYPE_INT, .default_value = "120", diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index d85fe41dcb0..9dcbc760330 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -307,6 +307,13 @@ tier_migrate_using_query_file (void *_args) per_file_status = 0; per_link_status = 0; + if (defrag->tier_conf.request_pause) { + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. Exiting tier_migrate_using_query_file"); + break; + } + memset (gfid_str, 0, UUID_CANONICAL_FORM_LEN+1); memset (query_record->_link_info_str, 0, DB_QUERY_RECORD_SIZE); @@ -368,6 +375,14 @@ tier_migrate_using_query_file (void *_args) /* Per link of file */ while (token_str != NULL) { + if (defrag->tier_conf.request_pause) { + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. " + "Exiting tier_migrate_using_query_file"); + goto abort; + } + link_str = gf_strdup (token_str); if (!link_info) { @@ -485,6 +500,14 @@ tier_migrate_using_query_file (void *_args) gf_uuid_copy (loc.gfid, loc.inode->gfid); + if (defrag->tier_conf.request_pause) { + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. " + "Exiting tier_migrate_using_query_file"); + goto abort; + } + ret = syncop_setxattr (this, &loc, migrate_data, 0, NULL, NULL); if (ret) { @@ -1347,6 +1370,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) goto out; } + if (defrag->tier_conf.request_pause) + defrag->tier_conf.paused = _gf_true; + else + defrag->tier_conf.paused = _gf_false; + sleep(1); if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { @@ -1368,6 +1396,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) goto out; } + if ((defrag->tier_conf.paused) || + (defrag->tier_conf.request_pause)) + continue; + + /* To have proper synchronization amongst all * brick holding nodes, so that promotion and demotions * start atomicly w.r.t promotion/demotion frequency @@ -1658,6 +1691,7 @@ tier_init (xlator_t *this) gf_defrag_info_t *defrag = NULL; char *voldir = NULL; char *mode = NULL; + char *paused = NULL; ret = dht_init (this); if (ret) { @@ -1776,7 +1810,15 @@ tier_init (xlator_t *this) defrag->tier_conf.mode = ret; } - ret = gf_asprintf (&voldir, "%s/%s", + defrag->tier_conf.request_pause = 0; + + ret = dict_get_str (this->options, + "tier-pause", &paused); + + if (paused && strcmp (paused, "on") == 0) + defrag->tier_conf.request_pause = 1; + + ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name); if (ret < 0) @@ -1844,6 +1886,9 @@ tier_reconfigure (xlator_t *this, dict_t *options) gf_defrag_info_t *defrag = NULL; char *mode = NULL; int migrate_mb = 0; + gf_boolean_t req_pause = _gf_false; + int ret = 0; + conf = this->private; if (conf->defrag) { @@ -1885,6 +1930,27 @@ tier_reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("tier-max-files", defrag->tier_conf.max_migrate_files, options, int32, out); + + GF_OPTION_RECONF ("tier-pause", + req_pause, options, + bool, out); + + if (req_pause == _gf_true) { + ret = gf_defrag_pause_tier (this, defrag); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_LOG_TIER_ERROR, + "pause tier failed on reconfigure"); + } + } else { + ret = gf_defrag_resume_tier (this, defrag); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_LOG_TIER_ERROR, + "resume tier failed on reconfigure"); + } + } + } out: |